19 files changed, 825 insertions, 294 deletions
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index 453e00fce3..fa919ca862 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -407,7 +407,17 @@ encode_arg({atom, Atom}, Dict0) when is_atom(Atom) ->
     {Index, Dict} = beam_dict:atom(Atom, Dict0),
     {encode(?tag_a, Index), Dict};
 encode_arg({integer, N}, Dict) ->
-    {encode(?tag_i, N), Dict};
+    %% Conservatily assume that all integers whose absolute
+    %% value is greater than 1 bsl 128 will be bignums in
+    %% the runtime system.
+    if
+        N >= 1 bsl 128 ->
+            encode_arg({literal, N}, Dict);
+        N =< -(1 bsl 128) ->
+            encode_arg({literal, N}, Dict);
+        true ->
+            {encode(?tag_i, N), Dict}
+    end;
 encode_arg(nil, Dict) ->
     {encode(?tag_a, 0), Dict};
 encode_arg({f, W}, Dict) ->
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 39ae8d5347..d0536e0669 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -43,12 +43,13 @@ function({function,Name,Arity,CLabel,Is0}, Blockify) ->
                   false ->
                       Is0
               end,
-        Is3 = beam_utils:anno_defs(Is2),
-        Is4 = move_allocates(Is3),
-        Is5 = beam_utils:live_opt(Is4),
-        Is6 = opt_blocks(Is5),
-        Is7 = beam_utils:delete_annos(Is6),
-        Is = opt_allocs(Is7),
+        Is3 = local_cse(Is2),
+        Is4 = beam_utils:anno_defs(Is3),
+        Is5 = move_allocates(Is4),
+        Is6 = beam_utils:live_opt(Is5),
+        Is7 = opt_blocks(Is6),
+        Is8 = beam_utils:delete_annos(Is7),
+        Is = opt_allocs(Is8),
 
         %% Done.
         {function,Name,Arity,CLabel,Is}
@@ -231,7 +232,7 @@ alloc_may_pass({set,_,_,_}) -> true.
 %%  Optimize the instruction stream inside a basic block.
 
 opt([{set,[X],[X],move}|Is]) -> opt(Is);
-opt([{set,[X],_,move},{set,[X],_,move}=I|Is]) ->
+opt([{set,[Dst],_,move},{set,[Dst],[Src],move}=I|Is]) when Dst =/= Src ->
     opt([I|Is]);
 opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) ->
     opt([I1,{set,[D2],[S1],move}|Is]);
@@ -289,7 +290,7 @@ opt_move(Dest, Is) ->
 opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) ->
     %% Provided that the source register is killed by instructions
     %% that follow, the optimization is safe.
-    case eliminate_use_of_from_reg(Is0, R, D, []) of
+    case eliminate_use_of_from_reg(Is0, R, D) of
 	{yes,Is} -> opt_move_rev(D, Acc, Is);
 	no -> not_possible
     end;
@@ -347,7 +348,7 @@ opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) ->
 opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) ->
     no;
 opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) ->
-    case eliminate_use_of_from_reg(Is0, S, D, []) of
+    case eliminate_use_of_from_reg(Is0, S, D) of
 	no ->
 	    no;
 	{yes,Is} ->
@@ -389,6 +390,14 @@ is_killed_or_used(R, {set,Ss,Ds,_}) ->
 %%  that FromRegister is still used and that the optimization is not
 %%  possible.
 
+eliminate_use_of_from_reg(Is, From, To) ->
+    try
+        eliminate_use_of_from_reg(Is, From, To, [])
+    catch
+        throw:not_possible ->
+            no
+    end.
+
 eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
     if
 	X < Live ->
@@ -397,21 +406,32 @@ eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
 	    {yes,reverse(Acc, Is0)}
     end;
 eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) ->
+    ensure_safe_tuple(I0, To),
     I = case member(From, Ss0) of
-	    true ->
-		Ss = [case S of
-			  From -> To;
-			  _ -> S
-		      end || S <- Ss0],
-		{set,Ds,Ss,Op};
-	    false ->
-		I0
-	end,
+            true ->
+                Ss = [case S of
+                          From -> To;
+                          _ -> S
+                      end || S <- Ss0],
+                {set,Ds,Ss,Op};
+            false ->
+                I0
+        end,
     case member(From, Ds) of
-	true ->
-	    {yes,reverse(Acc, [I|Is])};
-	false ->
-	    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
+        true ->
+            {yes,reverse(Acc, [I|Is])};
+        false ->
+            case member(To, Ds) of
+                true ->
+                    case beam_utils:is_killed_block(From, Is) of
+                        true ->
+                            {yes,reverse(Acc, [I|Is])};
+                        false ->
+                            no
+                    end;
+                false ->
+                    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
+            end
     end;
 eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
     case beam_utils:is_killed_block(From, [I]) of
@@ -421,6 +441,10 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
 	    no
     end.
 
+ensure_safe_tuple({set,[To],[],{put_tuple,_}}, To) ->
+    throw(not_possible);
+ensure_safe_tuple(_, _) -> ok.
+
 %% opt_allocs(Instructions) -> Instructions.  Optimize allocate
 %%  instructions inside blocks. If safe, replace an allocate_zero
 %%  instruction with the slightly cheaper allocate instruction.
@@ -541,3 +565,103 @@ defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
     x_live(Ds, Regs bor ((1 bsl Live) - 1));
 defined_regs([{set,Ds,_,_}|Is], Regs) ->
     defined_regs(Is, x_live(Ds, Regs)).
+
+%%%
+%%% Do local common sub expression elimination (CSE) in each block.
+%%%
+
+local_cse([{block,Bl0}|Is]) ->
+    Bl = cse_block(Bl0, orddict:new(), []),
+    [{block,Bl}|local_cse(Is)];
+local_cse([I|Is]) ->
+    [I|local_cse(Is)];
+local_cse([]) -> [].
+
+cse_block([I|Is], Es0, Acc0) ->
+    Es1 = cse_clear(I, Es0),
+    case cse_expr(I) of
+        none ->
+            %% Instruction is not suitable for CSE.
+            cse_block(Is, Es1, [I|Acc0]);
+        {ok,D,Expr} ->
+            %% Suitable instruction. First update the dictionary of
+            %% suitable expressions for the next iteration.
+            Es = cse_add(D, Expr, Es1),
+
+            %% Search for a previous identical expression.
+            case cse_find(Expr, Es0) of
+                error ->
+                    %% Nothing found
+                    cse_block(Is, Es, [I|Acc0]);
+                Src ->
+                    %% Use the previously calculated result.
+                    %% Also eliminate any line instruction.
+                    Move = {set,[D],[Src],move},
+                    case Acc0 of
+                        [{set,_,_,{line,_}}|Acc] ->
+                            cse_block(Is, Es, [Move|Acc]);
+                        [_|_] ->
+                            cse_block(Is, Es, [Move|Acc0])
+                    end
+            end
+    end;
+cse_block([], _, Acc) ->
+    reverse(Acc).
+
+%% cse_find(Expr, Expressions) -> error | Register.
+%%  Find a previously evaluated expression whose result can be reused,
+%%  or return 'error' if no such expression is found.
+
+cse_find(Expr, Es) ->
+    case orddict:find(Expr, Es) of
+        {ok,{Src,_}} -> Src;
+        error -> error
+    end.
+
+cse_expr({set,[D],Ss,{bif,N,_}}) ->
+    {ok,D,{{bif,N},Ss}};
+cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) ->
+    {ok,D,{{gc_bif,N},Ss}};
+cse_expr({set,[D],Ss,put_list}) ->
+    {ok,D,{put_list,Ss}};
+cse_expr(_) -> none.
+
+%% cse_clear(Instr, Expressions0) -> Expressions.
+%%  Remove all previous expressions that will become
+%%  invalid when this instruction is executed. Basically,
+%%  an expression is no longer safe to reuse when the
+%%  register it has been stored to has been modified, killed,
+%%  or if any of the source operands have changed.
+
+cse_clear({set,Ds,_,{alloc,Live,_}}, Es) ->
+    cse_clear_1(Es, Live, Ds);
+cse_clear({set,Ds,_,_}, Es) ->
+    cse_clear_1(Es, all, Ds).
+
+cse_clear_1(Es, Live, Ds0) ->
+    Ds = ordsets:from_list(Ds0),
+    [E || E <- Es, cse_is_safe(E, Live, Ds)].
+
+cse_is_safe({_,{Dst,Interfering}}, Live, Ds) ->
+    ordsets:is_disjoint(Interfering, Ds) andalso
+        case Dst of
+            {x,X} ->
+                X < Live;
+            _ ->
+                true
+        end.
+
+%% cse_add(Dest, Expr, Expressions0) -> Expressions.
+%%  Provided that it is safe, add a new expression to the dictionary
+%%  of already evaluated expressions.
+
+cse_add(D, {_,Ss}=Expr, Es) ->
+    case member(D, Ss) of
+        false ->
+            Interfering = ordsets:from_list([D|Ss]),
+            orddict:store(Expr, {D,Interfering}, Es);
+        true ->
+            %% Unsafe because the instruction overwrites one of
+            %% source operands.
+            Es
+    end.
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index da944f3ce6..dbbaae05eb 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -294,24 +294,25 @@ backward([{jump,{f,To}}=J|[{gc_bif,_,{f,To},_,_,_Dst}|Is]], D, Acc) ->
     %% register is initialized, and it is therefore no need to test
     %% for liveness of the destination register at label To.
     backward([J|Is], D, Acc);
-backward([{test,bs_start_match2,F,Live,[R,_]=Args,Ctxt}|Is], D,
-	 [{test,bs_match_string,F,[Ctxt,Bs]},
-	  {test,bs_test_tail2,F,[Ctxt,0]}|Acc0]=Acc) ->
+backward([{test,bs_start_match2,F,Live,[Src,_]=Args,Ctxt}|Is], D, Acc0) ->
     {f,To0} = F,
-    case beam_utils:is_killed(Ctxt, Acc0, D) of
-	true ->
-	    To = shortcut_bs_context_to_binary(To0, R, D),
-	    Eq = {test,is_eq_exact,{f,To},[R,{literal,Bs}]},
-	    backward(Is, D, [Eq|Acc0]);
-	false ->
-	    To = shortcut_bs_start_match(To0, R, D),
-	    I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
-	    backward(Is, D, [I|Acc])
+    case test_bs_literal(F, Ctxt, D, Acc0) of
+        {none,Acc} ->
+            %% Ctxt killed immediately after bs_start_match2.
+            To = shortcut_bs_context_to_binary(To0, Src, D),
+            I = {test,is_bitstr,{f,To},[Src]},
+            backward(Is, D, [I|Acc]);
+        {Literal,Acc} ->
+            %% Ctxt killed after matching a literal.
+            To = shortcut_bs_context_to_binary(To0, Src, D),
+            Eq = {test,is_eq_exact,{f,To},[Src,{literal,Literal}]},
+            backward(Is, D, [Eq|Acc]);
+        not_killed ->
+            %% Ctxt not killed. Not much to do.
+            To = shortcut_bs_start_match(To0, Src, D),
+            I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
+            backward(Is, D, [I|Acc0])
     end;
-backward([{test,bs_start_match2,{f,To0},Live,[Src|_]=Info,Dst}|Is], D, Acc) ->
-    To = shortcut_bs_start_match(To0, Src, D),
-    I = {test,bs_start_match2,{f,To},Live,Info,Dst},
-    backward(Is, D, [I|Acc]);
 backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) ->
     To1 = shortcut_bs_test(To0, Is, D),
     To2 = shortcut_label(To1, D),
@@ -511,6 +512,22 @@ remove_from_list(Lit, [Val,{f,_}=Fail|T]) ->
     [Val,Fail|remove_from_list(Lit, T)];
 remove_from_list(_, []) -> [].
 
+
+test_bs_literal(F, Ctxt, D,
+                [{test,bs_match_string,F,[Ctxt,Bs]},
+                 {test,bs_test_tail2,F,[Ctxt,0]}|Acc]) ->
+    test_bs_literal_1(Ctxt, Acc, D, Bs);
+test_bs_literal(F, Ctxt, D, [{test,bs_test_tail2,F,[Ctxt,0]}|Acc]) ->
+    test_bs_literal_1(Ctxt, Acc, D, <<>>);
+test_bs_literal(_, Ctxt, D, Acc) ->
+    test_bs_literal_1(Ctxt, Acc, D, none).
+
+test_bs_literal_1(Ctxt, Is, D, Literal) ->
+    case beam_utils:is_killed(Ctxt, Is, D) of
+        true -> {Literal,Is};
+        false -> not_killed
+    end.
+
 %% shortcut_bs_test(TargetLabel, ReversedInstructions, D) -> TargetLabel'
 %%  Try to shortcut the failure label for bit syntax matching.
 
diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl
index 22ba86fa38..50b76d7f29 100644
--- a/lib/compiler/src/beam_disasm.erl
+++ b/lib/compiler/src/beam_disasm.erl
@@ -1088,6 +1088,8 @@ resolve_inst({get_map_elements,Args0},_,_,_) ->
 
 resolve_inst({build_stacktrace,[]},_,_,_) ->
     build_stacktrace;
+resolve_inst({raw_raise,[]},_,_,_) ->
+    raw_raise;
 
 %%
 %% Catches instructions that are not yet handled.
diff --git a/lib/compiler/src/beam_disasm.hrl b/lib/compiler/src/beam_disasm.hrl
index 8cc0bcf99b..c3326c15a0 100644
--- a/lib/compiler/src/beam_disasm.hrl
+++ b/lib/compiler/src/beam_disasm.hrl
@@ -27,7 +27,7 @@
 %%      PROPER TYPES FOR THE SET OF BEAM INSTRUCTIONS.
 %%
 -type beam_instr() :: 'bs_init_writable' | 'build_stacktrace'
-                    | 'fclearerror' | 'if_end'
+                    | 'fclearerror' | 'if_end' | 'raw_raise'
                     | 'remove_message' | 'return' | 'send' | 'timeout'
                     | tuple().  %% XXX: Very underspecified - FIX THIS
 
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index 3b6bf49961..b2fabed2c5 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -80,96 +80,99 @@ simplify(Is0, TypeDb0) ->
 %%  Basic simplification, mostly tuples, no floating point optimizations.
 
 simplify_basic(Is, Ts) ->
-    simplify_basic_1(Is, Ts, []).
-    
-simplify_basic_1([{set,[D],[{integer,Index},Reg],{bif,element,_}}=I0|Is], Ts0, Acc) ->
-    I = case max_tuple_size(Reg, Ts0) of
-	    Sz when 0 < Index, Index =< Sz ->
-		{set,[D],[Reg],{get_tuple_element,Index-1}};
-	    _Other -> I0
-    end,
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is, Ts, [I|Acc]);
-simplify_basic_1([{set,[D],[TupleReg],{get_tuple_element,0}}=I|Is0], Ts0, Acc) ->
-    case tdb_find(TupleReg, Ts0) of
-	{tuple,_,_,[Contents]} ->
-	    simplify_basic_1([{set,[D],[Contents],move}|Is0], Ts0, Acc);
-	_ ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is0, Ts, [I|Acc])
+    simplify_basic(Is, Ts, []).
+
+simplify_basic([I0|Is], Ts0, Acc) ->
+    case simplify_instr(I0, Ts0) of
+        [] ->
+            simplify_basic(Is, Ts0, Acc);
+        [I] ->
+            Ts = update(I, Ts0),
+            simplify_basic(Is, Ts, [I|Acc])
+    end;
+simplify_basic([], Ts, Acc) ->
+    {reverse(Acc),Ts}.
+
+simplify_instr({set,[D],[{integer,Index},Reg],{bif,element,_}}=I, Ts) ->
+    case max_tuple_size(Reg, Ts) of
+        Sz when 0 < Index, Index =< Sz ->
+            [{set,[D],[Reg],{get_tuple_element,Index-1}}];
+        _ -> [I]
+    end;
+simplify_instr({test,is_atom,_,[R]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        boolean -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{set,_,_,{try_catch,_,_}}=I|Is], _Ts, Acc) ->
-    simplify_basic_1(Is, tdb_new(), [I|Acc]);
-simplify_basic_1([{test,is_atom,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,is_integer,_,[R]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        integer -> [];
+        {integer,_} -> [];
+	_ -> [I]
+    end;
+simplify_instr({set,[D],[TupleReg],{get_tuple_element,0}}=I, Ts) ->
+    case tdb_find(TupleReg, Ts) of
+        {tuple,_,_,[Contents]} ->
+            [{set,[D],[Contents],move}];
+        _ ->
+            [I]
+    end;
+simplify_instr({test,is_tuple,_,[R]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	boolean -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        {tuple,_,_,_} -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_integer,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,test_arity,_,[R,Arity]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	integer -> simplify_basic_1(Is, Ts, Acc);
-	{integer,_} -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        {tuple,exact_size,Arity,_} -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_tuple,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,is_map,_,[R]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	{tuple,_,_,_} -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        map -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,test_arity,_,[R,Arity]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	{tuple,exact_size,Arity,_} ->
-	    simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
+simplify_instr({test,is_nonempty_list,_,[R]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        nonempty_list -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_map,_,[R]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	map -> simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
+simplify_instr({test,is_eq_exact,Fail,[R,{atom,_}=Atom]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        {atom,_}=Atom -> [];
+        {atom,_} -> [{jump,Fail}];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_nonempty_list,_,[R]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	nonempty_list -> simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
-    end;
-simplify_basic_1([{test,is_eq_exact,Fail,[R,{atom,_}=Atom]}=I|Is0], Ts0, Acc0) ->
-    Acc = case tdb_find(R, Ts0) of
-	      {atom,_}=Atom -> Acc0;
-	      {atom,_} -> [{jump,Fail}|Acc0];
-	      _ -> [I|Acc0]
-	  end,
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is0, Ts, Acc);
-simplify_basic_1([{test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	{tuple,exact_size,Arity,[Tag]} ->
-	    simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
-    end;
-simplify_basic_1([{select,select_val,Reg,_,_}=I0|Is], Ts, Acc) ->
-    I = case tdb_find(Reg, Ts) of
-	    {integer,Range} ->
-		simplify_select_val_int(I0, Range);
-	    boolean ->
-		simplify_select_val_bool(I0);
-	    _ ->
-		I0
-	end,
-    simplify_basic_1(Is, tdb_new(), [I|Acc]);
-simplify_basic_1([I|Is], Ts0, Acc) ->
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is, Ts, [I|Acc]);
-simplify_basic_1([], Ts, Acc) ->
-    Is = reverse(Acc),
-    {Is,Ts}.
+simplify_instr({test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        {tuple,exact_size,Arity,[Tag]} -> [];
+        _ -> [I]
+    end;
+simplify_instr({select,select_val,Reg,_,_}=I, Ts) ->
+    [case tdb_find(Reg, Ts) of
+         {integer,Range} ->
+             simplify_select_val_int(I, Range);
+         boolean ->
+             simplify_select_val_bool(I);
+         _ ->
+             I
+     end];
+simplify_instr({test,bs_test_unit,_,[Src,Unit]}=I, Ts) ->
+    case tdb_find(Src, Ts) of
+        {binary,U} when U rem Unit =:= 0 -> [];
+        _ -> [I]
+    end;
+simplify_instr({test,is_binary,_,[Src]}=I, Ts) ->
+    case tdb_find(Src, Ts) of
+        {binary,U} when U rem 8 =:= 0 -> [];
+        _ -> [I]
+    end;
+simplify_instr({test,is_bitstr,_,[Src]}=I, Ts) ->
+    case tdb_find(Src, Ts) of
+        {binary,_} -> [];
+        _ -> [I]
+    end;
+simplify_instr(I, _) -> [I].
 
 simplify_select_val_int({select,select_val,R,_,L0}=I, {Min,Max}) ->
     Vs = sort([V || {integer,V} <- L0]),
@@ -504,8 +507,12 @@ update({test,is_eq_exact,_,[Reg,{atom,_}=Atom]}, Ts) ->
 update({test,is_record,_Fail,[Src,Tag,{integer,Arity}]}, Ts) ->
     tdb_update([{Src,{tuple,exact_size,Arity,[Tag]}}], Ts);
 
-%% Binary matching
+%% Binaries and binary matching.
 
+update({test,is_binary,_Fail,[Src]}, Ts0) ->
+    tdb_update([{Src,{binary,8}}], Ts0);
+update({test,is_bitstr,_Fail,[Src]}, Ts0) ->
+    tdb_update([{Src,{binary,1}}], Ts0);
 update({test,bs_get_integer2,_,_,Args,Dst}, Ts) ->
     tdb_update([{Dst,get_bs_integer_type(Args)}], Ts);
 update({test,bs_get_utf8,_,_,_,Dst}, Ts) ->
@@ -514,8 +521,10 @@ update({test,bs_get_utf16,_,_,_,Dst}, Ts) ->
     tdb_update([{Dst,?UNICODE_INT}], Ts);
 update({test,bs_get_utf32,_,_,_,Dst}, Ts) ->
     tdb_update([{Dst,?UNICODE_INT}], Ts);
+update({bs_init,_,{bs_init2,_,_},_,_,Dst}, Ts) ->
+    tdb_update([{Dst,{binary,8}}], Ts);
 update({bs_init,_,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
+    tdb_update([{Dst,{binary,1}}], Ts);
 update({bs_put,_,_,_}, Ts) ->
     Ts;
 update({bs_save2,_,_}, Ts) ->
@@ -524,12 +533,19 @@ update({bs_restore2,_,_}, Ts) ->
     Ts;
 update({bs_context_to_binary,Dst}, Ts) ->
     tdb_update([{Dst,kill}], Ts);
-update({test,bs_start_match2,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
-update({test,bs_get_binary2,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
+update({test,bs_start_match2,_,_,[Src,_],Dst}, Ts) ->
+    Type = case tdb_find(Src, Ts) of
+               {binary,_}=Type0 -> Type0;
+               _ -> {binary,1}
+           end,
+    tdb_update([{Dst,Type}], Ts);
+update({test,bs_get_binary2,_,_,[_,_,Unit,_],Dst}, Ts) ->
+    true = is_integer(Unit),                    %Assertion.
+    tdb_update([{Dst,{binary,Unit}}], Ts);
 update({test,bs_get_float2,_,_,_,Dst}, Ts) ->
     tdb_update([{Dst,float}], Ts);
+update({test,bs_test_unit,_,[Src,Unit]}, Ts) ->
+    tdb_update([{Src,{binary,Unit}}], Ts);
 
 update({test,_Test,_Fail,_Other}, Ts) ->
     Ts;
@@ -566,6 +582,7 @@ update({call_fun, _}, Ts) -> tdb_kill_xregs(Ts);
 update({apply, _}, Ts) -> tdb_kill_xregs(Ts);
 
 update({line,_}, Ts) -> Ts;
+update({'%',_}, Ts) -> Ts;
 
 %% The instruction is unknown.  Kill all information.
 update(_I, _Ts) -> tdb_new().
@@ -804,6 +821,9 @@ checkerror_2(OrigIs) -> [{set,[],[],fcheckerror}|OrigIs].
 %%%
 %%% 'integer' or {integer,{Min,Max}} that the register contains an
 %%% integer.
+%%%
+%%% {binary,Unit} means that the register contains a binary/bitstring aligned
+%%% to unit Unit.
 
 %% tdb_new() -> EmptyDataBase
 %%  Creates a new, empty type database.
@@ -929,11 +949,14 @@ merge_type_info({integer,_}=Int, integer) ->
     Int;
 merge_type_info({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) ->
     {integer,{max(Min1, Min2),min(Max1, Max2)}};
+merge_type_info({binary,U1}, {binary,U2}) ->
+    {binary,max(U1, U2)};
 merge_type_info(NewType, _) ->
     verify_type(NewType),
     NewType.
 
 verify_type({atom,_}) -> ok;
+verify_type({binary,U}) when is_integer(U) -> ok;
 verify_type(boolean) -> ok;
 verify_type(integer) -> ok;
 verify_type({integer,{Min,Max}})
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 901588ee3b..5333925589 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -871,6 +871,8 @@ live_opt([{block,Bl0}|Is], Regs0, D, Acc) ->
     live_opt(Is, Regs, D, [{block,[Live|Bl]}|Acc]);
 live_opt([build_stacktrace=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(1), D, [I|Acc]);
+live_opt([raw_raise=I|Is], _, D, Acc) ->
+    live_opt(Is, live_call(3), D, [I|Acc]);
 live_opt([{label,L}=I|Is], Regs, D0, Acc) ->
     D = gb_trees:insert(L, Regs, D0),
     live_opt(Is, Regs, D, [I|Acc]);
@@ -1142,6 +1144,8 @@ defs([{move,_,Dst}=I|Is], Regs0, D) ->
 defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) ->
     Regs = def_regs([Dst], Regs0),
     [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([raw_raise=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
 defs([return=I|Is], _Regs, D) ->
     [I|defs_unreachable(Is, D)];
 defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 4feb26c513..f8bf935132 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -85,8 +85,6 @@ format_error(Error) ->
 %%% Things currently not checked. XXX
 %%%
 %%% - Heap allocation for binaries.
-%%% - That put_tuple is followed by the correct number of
-%%%   put instructions.
 %%%
 
 %% validate(Module, [Function]) -> [] | [Error]
@@ -148,7 +146,8 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
 	 hf=0,				%Available heap size for floats.
 	 fls=undefined,			%Floating point state.
 	 ct=[],				%List of hot catch/try labels
-	 setelem=false			%Previous instruction was setelement/3.
+         setelem=false,                 %Previous instruction was setelement/3.
+         puts_left=none                 %put/1 instructions left.
 	}).
 
 -type label()        :: integer().
@@ -340,11 +339,25 @@ valfun_1({put_list,A,B,Dst}, Vst0) ->
     Vst = eat_heap(2, Vst0),
     set_type_reg(cons, Dst, Vst);
 valfun_1({put_tuple,Sz,Dst}, Vst0) when is_integer(Sz) ->
+    Vst1 = eat_heap(1, Vst0),
+    Vst = set_type_reg(tuple_in_progress, Dst, Vst1),
+    #vst{current=St0} = Vst,
+    St = St0#st{puts_left={Sz,{Dst,{tuple,Sz}}}},
+    Vst#vst{current=St};
+valfun_1({put,Src}, Vst0) ->
+    assert_term(Src, Vst0),
     Vst = eat_heap(1, Vst0),
-    set_type_reg({tuple,Sz}, Dst, Vst);
-valfun_1({put,Src}, Vst) ->
-    assert_term(Src, Vst),
-    eat_heap(1, Vst);
+    #vst{current=St0} = Vst,
+    case St0 of
+        #st{puts_left=none} ->
+            error(not_building_a_tuple);
+        #st{puts_left={1,{Dst,Type}}} ->
+            St = St0#st{puts_left=none},
+            set_type_reg(Type, Dst, Vst#vst{current=St});
+        #st{puts_left={PutsLeft,Info}} when is_integer(PutsLeft) ->
+            St = St0#st{puts_left={PutsLeft-1,Info}},
+            Vst#vst{current=St}
+    end;
 %% Instructions for optimization of selective receives.
 valfun_1({recv_mark,{f,Fail}}, Vst) when is_integer(Fail) ->
     Vst;
@@ -524,6 +537,8 @@ valfun_4({bif,element,{f,Fail},[Pos,Tuple],Dst}, Vst0) ->
 valfun_4({bif,raise,{f,0},Src,_Dst}, Vst) ->
     validate_src(Src, Vst),
     kill_state(Vst);
+valfun_4(raw_raise=I, Vst) ->
+    call(I, 3, Vst);
 valfun_4({bif,Op,{f,Fail},Src,Dst}, Vst0) ->
     validate_src(Src, Vst0),
     Vst = branch_state(Fail, Vst0),
@@ -1272,6 +1287,7 @@ get_move_term_type(Src, Vst) ->
 	initialized -> error({unassigned,Src});
 	{catchtag,_} -> error({catchtag,Src});
 	{trytag,_} -> error({trytag,Src});
+        tuple_in_progress -> error({tuple_in_progress,Src});
 	Type -> Type
     end.
 
@@ -1280,10 +1296,7 @@ get_move_term_type(Src, Vst) ->
 %%  a standard Erlang type (no catch/try tags or match contexts).
 
 get_term_type(Src, Vst) ->
-    case get_term_type_1(Src, Vst) of
-	initialized -> error({unassigned,Src});
-	{catchtag,_} -> error({catchtag,Src});
-	{trytag,_} -> error({trytag,Src});
+    case get_move_term_type(Src, Vst) of
 	#ms{} -> error({match_context,Src});
 	Type -> Type
     end.
diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab
index 397e478e1e..d59bb241a8 100755
--- a/lib/compiler/src/genop.tab
+++ b/lib/compiler/src/genop.tab
@@ -554,3 +554,13 @@ BEAM_FORMAT_NUMBER=0
 ##       Do a garbage collection if necessary to allocate space on the heap
 ##       for the result.
 160: build_stacktrace/0
+
+## @spec raw_raise
+## @doc  This instruction works like the erlang:raise/3 BIF, except that the
+##       stacktrace in x(2) must be a raw stacktrace.
+##       x(0) is the class of the exception (error, exit, or throw),
+##       x(1) is the exception term, and x(2) is the raw stackframe.
+##       If x(0) is not a valid class, the instruction will not throw an
+##       exception, but store the atom 'badarg' in x(0) and execute the
+##       next instruction.
+161: raw_raise/0
diff --git a/lib/compiler/src/sys_core_bsm.erl b/lib/compiler/src/sys_core_bsm.erl
index 37e071fafa..65580f79e3 100644
--- a/lib/compiler/src/sys_core_bsm.erl
+++ b/lib/compiler/src/sys_core_bsm.erl
@@ -24,7 +24,7 @@
 -export([module/2,format_error/1]).
 
 -include("core_parse.hrl").
--import(lists, [member/2,nth/2,reverse/1,usort/1]).
+-import(lists, [member/2,reverse/1,usort/1]).
 
 -spec module(cerl:c_module(), [compile:option()]) -> {'ok', cerl:c_module()}.
 
@@ -59,13 +59,6 @@ format_error(bin_opt_alias) ->
 format_error(bin_partition) ->
     "INFO: matching non-variables after a previous clause matching a variable "
 	"will prevent delayed sub binary optimization";
-format_error(bin_left_var_used_in_guard) ->
-    "INFO: a variable to the left of the binary pattern is used in a guard; "
-	"will prevent delayed sub binary optimization";
-format_error(bin_argument_order) ->
-    "INFO: matching anything else but a plain variable to the left of "
-	"binary pattern will prevent delayed sub binary optimization; "
-	"SUGGEST changing argument order";
 format_error(bin_var_used) ->
     "INFO: using a matched out sub binary will prevent "
 	"delayed sub binary optimization";
@@ -96,46 +89,41 @@ bsm_an(#c_case{arg=#c_values{es=Es}}=Case) ->
 bsm_an(Other) ->
     {ok,Other}.
 
-bsm_an_1(Vs, #c_case{clauses=Cs}=Case) ->
-    case bsm_leftmost(Cs) of
-	none -> {ok,Case};
-	Pos -> bsm_an_2(Vs, Cs, Case, Pos)
-    end.
-
-bsm_an_2(Vs, Cs, Case, Pos) ->
-    case bsm_nonempty(Cs, Pos) of
-	true -> bsm_an_3(Vs, Cs, Case, Pos);
-	false -> {ok,Case}
+bsm_an_1(Vs0, #c_case{clauses=Cs0}=Case) ->
+    case bsm_leftmost(Cs0) of
+	none ->
+            {ok,Case};
+        1 ->
+            bsm_an_2(Vs0, Cs0, Case);
+        Pos ->
+            Vs = move_from_col(Pos, Vs0),
+            Cs = [C#c_clause{pats=move_from_col(Pos, Ps)} ||
+                     #c_clause{pats=Ps}=C <- Cs0],
+            bsm_an_2(Vs, Cs, Case)
     end.
 
-bsm_an_3(Vs, Cs, Case, Pos) ->
+bsm_an_2(Vs, Cs, Case) ->
     try
-	bsm_ensure_no_partition(Cs, Pos),
-	{ok,bsm_do_an(Vs, Pos, Cs, Case)}
+        bsm_ensure_no_partition(Cs),
+        {ok,bsm_do_an(Vs, Cs, Case)}
     catch
-	throw:{problem,Where,What} ->
-	    {ok,Case,{Where,What}}
+        throw:{problem,Where,What} ->
+            {ok,Case,{Where,What}}
     end.
 
-bsm_do_an(Vs0, Pos, Cs0, Case) ->
-    case nth(Pos, Vs0) of
-	#c_var{name=Vname}=V0 ->
-	    Cs = bsm_do_an_var(Vname, Pos, Cs0, []),
-	    V = bsm_annotate_for_reuse(V0),
-	    Bef = lists:sublist(Vs0, Pos-1),
-	    Aft = lists:nthtail(Pos, Vs0),
-	    case Bef ++ [V|Aft] of
-		[_] ->
-		    Case#c_case{arg=V,clauses=Cs};
-		Vs ->
-		    Case#c_case{arg=#c_values{es=Vs},clauses=Cs}
-	    end;
-	_ ->
-	    Case
-    end.
+move_from_col(Pos, L) ->
+    {First,[Col|Rest]} = lists:split(Pos - 1, L),
+    [Col|First] ++ Rest.
 
-bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) ->
-    case nth(S, Ps) of
+bsm_do_an([#c_var{name=Vname}=V0|Vs0], Cs0, Case) ->
+    Cs = bsm_do_an_var(Vname, Cs0),
+    V = bsm_annotate_for_reuse(V0),
+    Vs = core_lib:make_values([V|Vs0]),
+    Case#c_case{arg=Vs,clauses=Cs};
+bsm_do_an(_Vs, _Cs, Case) -> Case.
+
+bsm_do_an_var(V, [#c_clause{pats=[P|_],guard=G,body=B0}=C0|Cs]) ->
+    case P of
 	#c_var{name=VarName} ->
 	    case core_lib:is_var_used(V, G) of
 		true -> bsm_problem(C0, orig_bin_var_used_in_guard);
@@ -148,23 +136,23 @@ bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) ->
 	    B1 = bsm_maybe_ctx_to_binary(VarName, B0),
 	    B = bsm_maybe_ctx_to_binary(V, B1),
 	    C = C0#c_clause{body=B},
-	    bsm_do_an_var(V, S, Cs, [C|Acc]);
-	#c_alias{}=P ->
+            [C|bsm_do_an_var(V, Cs)];
+        #c_alias{} ->
 	    case bsm_could_match_binary(P) of
 		false ->
-		    bsm_do_an_var(V, S, Cs, [C0|Acc]);
+		    [C0|bsm_do_an_var(V, Cs)];
 		true ->
 		    bsm_problem(C0, bin_opt_alias)
 	    end;
-	P ->
+        _ ->
 	    case bsm_could_match_binary(P) andalso bsm_is_var_used(V, G, B0) of
 		false ->
-		    bsm_do_an_var(V, S, Cs, [C0|Acc]);
+		    [C0|bsm_do_an_var(V, Cs)];
 		true ->
 		    bsm_problem(C0, bin_var_used)
 	    end
     end;
-bsm_do_an_var(_, _, [], Acc) -> reverse(Acc).
+bsm_do_an_var(_, []) -> [].
 
 bsm_annotate_for_reuse(#c_var{anno=Anno}=Var) ->
     Var#c_var{anno=[reuse_for_context|Anno]}.
@@ -192,131 +180,82 @@ previous_ctx_to_binary(V, Core) ->
     end.
 
 %% bsm_leftmost(Cs) -> none | ArgumentNumber
-%%  Find the leftmost argument that does binary matching. Return
-%%  the number of the argument (1-N).
+%%  Find the leftmost argument that matches a nonempty binary.
+%%  Return either 'none' or the argument number (1-N).
 
 bsm_leftmost(Cs) ->
     bsm_leftmost_1(Cs, none).
 
+bsm_leftmost_1([_|_], 1) ->
+    1;
 bsm_leftmost_1([#c_clause{pats=Ps}|Cs], Pos) ->
     bsm_leftmost_2(Ps, Cs, 1, Pos);
 bsm_leftmost_1([], Pos) -> Pos.
 
 bsm_leftmost_2(_, Cs, Pos, Pos) ->
     bsm_leftmost_1(Cs, Pos);
-bsm_leftmost_2([#c_binary{}|_], Cs, N, _) ->
+bsm_leftmost_2([#c_binary{segments=[_|_]}|_], Cs, N, _) ->
     bsm_leftmost_1(Cs, N);
 bsm_leftmost_2([_|Ps], Cs, N, Pos) ->
     bsm_leftmost_2(Ps, Cs, N+1, Pos);
 bsm_leftmost_2([], Cs, _, Pos) ->
     bsm_leftmost_1(Cs, Pos).
 
-%% bsm_nonempty(Cs, Pos) -> true|false
-%%  Check if at least one of the clauses matches a non-empty
-%%  binary in the given argument position.
+%% bsm_ensure_no_partition(Cs) -> ok     (exception if problem)
+%%  There must only be a single bs_start_match2 instruction if we
+%%  are to reuse the binary variable for the match context.
+%%
+%%  To make sure that there is only a single bs_start_match2
+%%  instruction, we will check for partitions such as:
 %%
-bsm_nonempty([#c_clause{pats=Ps}|Cs], Pos) ->
-    case nth(Pos, Ps) of
-	#c_binary{segments=[_|_]} ->
-	    true;
-	_ ->
-	    bsm_nonempty(Cs, Pos)
-    end;
-bsm_nonempty([], _ ) -> false.
-
-%% bsm_ensure_no_partition(Cs, Pos) -> ok     (exception if problem)
-%%  We must make sure that matching is not partitioned between
-%%  variables like this:
 %%             foo(<<...>>) -> ...
 %%             foo(<Variable>) when ... -> ...
-%%             foo(<Any non-variable pattern>) ->
-%%  If there is such partition, we are not allowed to reuse the binary variable
-%%  for the match context.
+%%             foo(<Non-variable pattern>) ->
 %%
-%%  Also, arguments to the left of the argument that is matched
-%%  against a binary, are only allowed to be simple variables, not
-%%  used in guards. The reason is that we must know that the binary is
-%%  only matched in one place (i.e. there must be only one bs_start_match2
-%%  instruction emitted).
+%%  If there is such partition, we reject the optimization.
 
-bsm_ensure_no_partition(Cs, Pos) ->
-    bsm_ensure_no_partition_1(Cs, Pos, before).
+bsm_ensure_no_partition(Cs) ->
+    bsm_ensure_no_partition_1(Cs, before).
 
 %% Loop through each clause.
-bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], Pos, State0) ->
-    State = bsm_ensure_no_partition_2(Ps, Pos, G, simple_vars, State0),
+bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], State0) ->
+    State = bsm_ensure_no_partition_2(Ps, G, State0),
     case State of
 	'after' ->
-	    bsm_ensure_no_partition_after(Cs, Pos);
+	    bsm_ensure_no_partition_after(Cs);
 	_ ->
 	    ok
     end,
-    bsm_ensure_no_partition_1(Cs, Pos, State);
-bsm_ensure_no_partition_1([], _, _) -> ok.
+    bsm_ensure_no_partition_1(Cs, State);
+bsm_ensure_no_partition_1([], _) -> ok.
 
-%% Loop through each pattern for this clause.
-bsm_ensure_no_partition_2([#c_binary{}=Where|_], 1, _, Vstate, State) ->
-    case State of
-	before when Vstate =:= simple_vars -> within;
-	before -> bsm_problem(Where, Vstate);
-	within when Vstate =:= simple_vars -> within;
-	within -> bsm_problem(Where, Vstate)
-    end;
-bsm_ensure_no_partition_2([#c_alias{}=Alias|_], 1, N, Vstate, State) ->
+bsm_ensure_no_partition_2([#c_binary{}|_], _, _State) ->
+    within;
+bsm_ensure_no_partition_2([#c_alias{}=Alias|_], N, State) ->
     %% Retrieve the real pattern that the alias refers to and check that.
     P = bsm_real_pattern(Alias),
-    bsm_ensure_no_partition_2([P], 1, N, Vstate, State);
-bsm_ensure_no_partition_2([_|_], 1, _, _Vstate, before=State) ->
+    bsm_ensure_no_partition_2([P], N, State);
+bsm_ensure_no_partition_2([_|_], _, before=State) ->
     %% No binary matching yet - therefore no partition.
     State;
-bsm_ensure_no_partition_2([P|_], 1, _, Vstate, State) ->
+bsm_ensure_no_partition_2([P|_], _, State) ->
     case bsm_could_match_binary(P) of
 	false ->
-	    %% If clauses can be freely arranged (Vstate =:= simple_vars),
-	    %% a clause that cannot match a binary will not partition the clause.
-	    %% Example:
-	    %%
-	    %% a(Var, <<>>) -> ...
-	    %% a(Var, []) -> ...
-	    %% a(Var, <<B>>) -> ...
-	    %%
-	    %% But if the clauses can't be freely rearranged, as in
-	    %%
-	    %% b(Var, <<X>>) -> ...
-	    %% b(1, 2) -> ...
-	    %%
-	    %% we do have a problem.
-	    %%
-	    case Vstate of
-		simple_vars -> State;
-		_ -> bsm_problem(P, Vstate)
-	    end;
+            State;
 	true ->
 	    %% The pattern P *may* match a binary, so we must update the state.
 	    %% (P must be a variable.)
-	    case State of
-		within -> 'after';
-		'after' -> 'after'
-	    end
-    end;
-bsm_ensure_no_partition_2([#c_var{name=V}|Ps], N, G, Vstate, S) ->
-    case core_lib:is_var_used(V, G) of
-	false ->
-	    bsm_ensure_no_partition_2(Ps, N-1, G, Vstate, S);
-	true ->
-	    bsm_ensure_no_partition_2(Ps, N-1, G, bin_left_var_used_in_guard, S)
-    end;
-bsm_ensure_no_partition_2([_|Ps], N, G, _, S) ->
-    bsm_ensure_no_partition_2(Ps, N-1, G, bin_argument_order, S).
+            'after'
+    end.
 
-bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs], Pos) ->
-    case nth(Pos, Ps) of
-	#c_var{} ->
-	    bsm_ensure_no_partition_after(Cs, Pos);
-	_ ->
-	    bsm_problem(C, bin_partition)
+bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs]) ->
+    case Ps of
+        [#c_var{}|_] ->
+            bsm_ensure_no_partition_after(Cs);
+        _ ->
+            bsm_problem(C, bin_partition)
     end;
-bsm_ensure_no_partition_after([], _) -> ok.
+bsm_ensure_no_partition_after([]) -> ok.
 
 bsm_could_match_binary(#c_alias{pat=P}) -> bsm_could_match_binary(P);
 bsm_could_match_binary(#c_cons{}) -> false;
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index 46816fe24a..a9bd363ee1 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2507,6 +2507,72 @@ are_all_failing_clauses(Cs) ->
 is_failing_clause(#c_clause{body=B}) ->
     will_fail(B).
 
+%% opt_build_stacktrace(Let) -> Core.
+%%  If the stacktrace is *only* used in a call to erlang:raise/3,
+%%  there is no need to build a cooked stackframe using build_stacktrace/1.
+
+opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}],
+                            arg=#c_primop{name=#c_literal{val=build_stacktrace},
+                                          args=[RawStk]},
+                            body=Body}=Let) ->
+    case Body of
+        #c_call{module=#c_literal{val=erlang},
+                name=#c_literal{val=raise},
+                args=[Class,Exp,#c_var{name=Cooked}]} ->
+            %% The stacktrace is only used in a call to erlang:raise/3.
+            %% There is no need to build the stacktrace. Replace the
+            %% call to erlang:raise/3 with the the raw_raise/3 instruction,
+            %% which will use a raw stacktrace.
+            #c_primop{name=#c_literal{val=raw_raise},
+                      args=[Class,Exp,RawStk]};
+        #c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked ->
+            case core_lib:is_var_used(Cooked, Arg) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% the body of the let.
+                    B = opt_build_stacktrace(Let#c_let{body=B0}),
+                    Body#c_let{body=B};
+                true ->
+                    Let
+            end;
+        #c_seq{arg=Arg,body=B0} ->
+            case core_lib:is_var_used(Cooked, Arg) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% the body of the sequence.
+                    B = opt_build_stacktrace(Let#c_let{body=B0}),
+                    Body#c_seq{body=B};
+                true ->
+                    Let
+            end;
+        #c_case{arg=Arg,clauses=Cs0} ->
+            case core_lib:is_var_used(Cooked, Arg) orelse
+                is_used_in_any_guard(Cooked, Cs0) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% each arm of the case.
+                    Cs = [begin
+                              B = opt_build_stacktrace(Let#c_let{body=B0}),
+                              C#c_clause{body=B}
+                          end || #c_clause{body=B0}=C <- Cs0],
+                    Body#c_case{clauses=Cs};
+                true ->
+                    Let
+            end;
+        _ ->
+            Let
+    end;
+opt_build_stacktrace(Expr) ->
+    Expr.
+
+is_used_in_any_guard(V, Cs) ->
+    any(fun(#c_clause{guard=G}) ->
+                core_lib:is_var_used(V, G)
+        end, Cs).
+
 %% opt_case_in_let(Let) -> Let'
 %%  Try to avoid building tuples that are immediately matched.
 %%  A common pattern is:
@@ -2712,8 +2778,9 @@ opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Sub) ->
 %%  Note that the substitutions and scope in Sub have been cleared
 %%  and should not be used.
 
-post_opt_let(Let, Sub) ->
-    opt_bool_case_in_let(Let, Sub).
+post_opt_let(Let0, Sub) ->
+    Let1 = opt_bool_case_in_let(Let0, Sub),
+    opt_build_stacktrace(Let1).
 
 
 %% remove_first_value(Core0, Sub) -> Core.
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 8f3399d133..a96d58a903 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -1855,7 +1855,12 @@ internal_cg(guard_error, [ExitCall], _Rs, Le, Vdb, Bef, St) ->
     {Ms,_} = cg_call_args(As, Bef, Le#l.i, Vdb),
     Call = {call_ext,Arity,{extfunc,Mod,Name,Arity}},
     Is = Ms++[line(Le),Call],
-    {Is,Bef,St}.
+    {Is,Bef,St};
+internal_cg(raw_raise=I, As, Rs, Le, Vdb, Bef, St) ->
+    %% This behaves like a function call.
+    {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb),
+    Reg = load_vars(Rs, clear_regs(Int#sr.reg)),
+    {Sis++[I],clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb),St}.
 
 %% bif_cg(Bif, [Arg], [Ret], Le, Vdb, StackReg, State) ->
 %%      {[Ainstr],StackReg,State}.
diff --git a/lib/compiler/test/beam_block_SUITE.erl b/lib/compiler/test/beam_block_SUITE.erl
index 55d5f2dbe8..fac18789e0 100644
--- a/lib/compiler/test/beam_block_SUITE.erl
+++ b/lib/compiler/test/beam_block_SUITE.erl
@@ -22,7 +22,7 @@
 -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
 	 init_per_group/2,end_per_group/2,
 	 get_map_elements/1,otp_7345/1,move_opt_across_gc_bif/1,
-	 erl_202/1,repro/1]).
+	 erl_202/1,repro/1,local_cse/1]).
 
 %% The only test for the following functions is that
 %% the code compiles and is accepted by beam_validator.
@@ -40,7 +40,8 @@ groups() ->
        otp_7345,
        move_opt_across_gc_bif,
        erl_202,
-       repro
+       repro,
+       local_cse
       ]}].
 
 init_per_suite(Config) ->
@@ -237,6 +238,63 @@ find_operands(Cfg,XsiGraph,ActiveList,Count) ->
     [Count+1, length(NewActiveList), length(digraph:vertices(XsiGraph))],
     find_operands(NewCfg,XsiGraph,NewActiveList,Count+1).
 
+%% Some tests of local common subexpression elimination (CSE).
+
+local_cse(_Config) ->
+    {Self,{ok,Self}} = local_cse_1(),
+
+    local_cse_2([]),
+    local_cse_2(lists:seq(1, 512)),
+    local_cse_2(?MODULE:module_info()),
+
+    {[b],[a,b]} = local_cse_3(a, b),
+
+    {2000,Self,{Self,write_cache}} = local_cse_4(),
+
+    ok.
+
+local_cse_1() ->
+    %% Cover handling of unsafe tuple construction in
+    %% eliminate_use_of_from_reg/4. It became necessary to handle
+    %% unsafe tuples when local CSE was introduced.
+
+    {self(),{ok,self()}}.
+
+local_cse_2(Term) ->
+    case cse_make_binary(Term) of
+        <<Size:8,BinTerm:Size/binary>> ->
+            Term = binary_to_term(BinTerm);
+        <<Size:8,SizeTerm:Size/binary,BinTerm/binary>> ->
+            {'$size',TermSize} = binary_to_term(SizeTerm),
+            TermSize = byte_size(BinTerm),
+            Term = binary_to_term(BinTerm)
+    end.
+
+%% Copy of observer_backend:ttb_make_binary/1. During development of
+%% the local CSE optimization this function was incorrectly optimized.
+
+cse_make_binary(Term) ->
+    B = term_to_binary(Term),
+    SizeB = byte_size(B),
+    if SizeB > 255 ->
+            SB = term_to_binary({'$size',SizeB}),
+            <<(byte_size(SB)):8, SB/binary, B/binary>>;
+       true ->
+            <<SizeB:8, B/binary>>
+    end.
+
+local_cse_3(X, Y) ->
+    %% The following expression was incorrectly transformed to {[X,Y],[X,Y]}
+    %% during development of the local CSE optimization.
+
+    {[Y],[X,Y]}.
+
+local_cse_4() ->
+    do_local_cse_4(2000, self(), {self(), write_cache}).
+
+do_local_cse_4(X, Y, Z) ->
+    {X,Y,Z}.
+
 %%%
 %%% Common functions.
 %%%
diff --git a/lib/compiler/test/beam_type_SUITE.erl b/lib/compiler/test/beam_type_SUITE.erl
index fe856b12b6..dfbf2aa4a0 100644
--- a/lib/compiler/test/beam_type_SUITE.erl
+++ b/lib/compiler/test/beam_type_SUITE.erl
@@ -23,7 +23,7 @@
 	 init_per_group/2,end_per_group/2,
 	 integers/1,coverage/1,booleans/1,setelement/1,cons/1,
 	 tuple/1,record_float/1,binary_float/1,float_compare/1,
-	 arity_checks/1]).
+	 arity_checks/1,elixir_binaries/1]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
@@ -42,7 +42,8 @@ groups() ->
        record_float,
        binary_float,
        float_compare,
-       arity_checks
+       arity_checks,
+       elixir_binaries
       ]}].
 
 init_per_suite(Config) ->
@@ -199,5 +200,42 @@ do_tuple_arity_check(RGB) when is_tuple(RGB),
         _ -> ok
     end.
 
+elixir_binaries(_Config) ->
+    <<"foo blitzky baz">> = elixir_binary_1(<<"blitzky">>),
+    <<"foo * baz">> = elixir_binary_2($*),
+    <<7:4,755:10>> = elixir_bitstring_3(<<755:10>>),
+    ok.
+
+elixir_binary_1(Bar) when is_binary(Bar) ->
+    <<"foo ",
+      case Bar of
+          Rewrite when is_binary(Rewrite) ->
+              Rewrite;
+          Rewrite ->
+              list_to_binary(Rewrite)
+      end/binary,
+      " baz">>.
+
+elixir_binary_2(Arg) ->
+    Bin = <<Arg>>,
+    <<"foo ",
+      case Bin of
+          Rewrite when is_binary(Rewrite) ->
+              Rewrite;
+          Rewrite ->
+              list_to_binary:to_string(Rewrite)
+      end/binary,
+      " baz">>.
+
+elixir_bitstring_3(Bar) when is_bitstring(Bar) ->
+    <<7:4,
+      case Bar of
+          Rewrite when is_bitstring(Rewrite) ->
+              Rewrite;
+          Rewrite ->
+              list_to_bitstring(Rewrite)
+      end/bitstring>>.
+
+
 id(I) ->
     I.
diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl
index 685eb2a72e..63a13281a8 100644
--- a/lib/compiler/test/beam_validator_SUITE.erl
+++ b/lib/compiler/test/beam_validator_SUITE.erl
@@ -33,8 +33,8 @@
 	 state_after_fault_in_catch/1,no_exception_in_catch/1,
 	 undef_label/1,illegal_instruction/1,failing_gc_guard_bif/1,
 	 map_field_lists/1,cover_bin_opt/1,
-	 val_dsetel/1]).
-	 
+	 val_dsetel/1,bad_tuples/1]).
+
 -include_lib("common_test/include/ct.hrl").
 
 init_per_testcase(Case, Config) when is_atom(Case), is_list(Config) ->
@@ -61,7 +61,8 @@ groups() ->
        freg_state,bad_bin_match,bad_dsetel,
        state_after_fault_in_catch,no_exception_in_catch,
        undef_label,illegal_instruction,failing_gc_guard_bif,
-       map_field_lists,cover_bin_opt,val_dsetel]}].
+       map_field_lists,cover_bin_opt,val_dsetel,
+       bad_tuples]}].
 
 init_per_suite(Config) ->
     Config.
@@ -509,6 +510,19 @@ destroy_reg({Tag,N}) ->
 	    {y,N+1}
     end.
 
+bad_tuples(Config) ->
+    Errors = do_val(bad_tuples, Config),
+    [{{bad_tuples,heap_overflow,1},
+      {{put,{x,0}},8,{heap_overflow,{left,0},{wanted,1}}}},
+     {{bad_tuples,long,2},
+      {{put,{atom,too_long}},8,not_building_a_tuple}},
+     {{bad_tuples,self_referential,1},
+      {{put,{x,1}},7,{tuple_in_progress,{x,1}}}},
+     {{bad_tuples,short,1},
+      {{move,{x,1},{x,0}},7,{tuple_in_progress,{x,1}}}}] = Errors,
+
+    ok.
+
 %%%-------------------------------------------------------------------------
 
 transform_remove(Remove, Module) ->
diff --git a/lib/compiler/test/beam_validator_SUITE_data/bad_tuples.S b/lib/compiler/test/beam_validator_SUITE_data/bad_tuples.S
new file mode 100644
index 0000000000..7980241c37
--- /dev/null
+++ b/lib/compiler/test/beam_validator_SUITE_data/bad_tuples.S
@@ -0,0 +1,88 @@
+{module, bad_tuples}.  %% version = 0
+
+{exports, [{heap_overflow,1},
+           {long,2},
+           {module_info,0},
+           {module_info,1},
+           {self_referential,1},
+           {short,1}]}.
+
+{attributes, []}.
+
+{labels, 13}.
+
+
+{function, short, 1, 2}.
+  {label,1}.
+    {line,[{location,"bad_tuples.erl",4}]}.
+    {func_info,{atom,bad_tuples},{atom,short},1}.
+  {label,2}.
+    {test_heap,3,1}.
+    {put_tuple,2,{x,1}}.
+    {put,{atom,ok}}.
+    {move,{x,1},{x,0}}.
+    return.
+
+
+{function, long, 2, 4}.
+  {label,3}.
+    {line,[{location,"bad_tuples.erl",7}]}.
+    {func_info,{atom,bad_tuples},{atom,long},2}.
+  {label,4}.
+    {test_heap,6,2}.
+    {put_tuple,2,{x,2}}.
+    {put,{x,0}}.
+    {put,{x,1}}.
+    {put,{atom,too_long}}.
+    {put_tuple,2,{x,0}}.
+    {put,{atom,ok}}.
+    {put,{x,2}}.
+    return.
+
+
+{function, heap_overflow, 1, 6}.
+  {label,5}.
+    {line,[{location,"bad_tuples.erl",10}]}.
+    {func_info,{atom,bad_tuples},{atom,heap_overflow},1}.
+  {label,6}.
+    {test_heap,3,1}.
+    {put_tuple,2,{x,1}}.
+    {put,{atom,ok}}.
+    {put,{x,0}}.
+    {put,{x,0}}.
+    {move,{x,1},{x,0}}.
+    return.
+
+
+{function, self_referential, 1, 8}.
+  {label,7}.
+    {line,[{location,"bad_tuples.erl",13}]}.
+    {func_info,{atom,bad_tuples},{atom,self_referential},1}.
+  {label,8}.
+    {test_heap,3,1}.
+    {put_tuple,2,{x,1}}.
+    {put,{atom,ok}}.
+    {put,{x,1}}.
+    {move,{x,1},{x,0}}.
+    return.
+
+
+{function, module_info, 0, 10}.
+  {label,9}.
+    {line,[]}.
+    {func_info,{atom,bad_tuples},{atom,module_info},0}.
+  {label,10}.
+    {move,{atom,bad_tuples},{x,0}}.
+    {line,[]}.
+    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.
+
+
+{function, module_info, 1, 12}.
+  {label,11}.
+    {line,[]}.
+    {func_info,{atom,bad_tuples},{atom,module_info},1}.
+  {label,12}.
+    {move,{x,0},{x,1}}.
+    {move,{atom,bad_tuples},{x,0}}.
+    {line,[]}.
+    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 7557d6d57b..235956a714 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -40,7 +40,7 @@
 	 map_and_binary/1,unsafe_branch_caching/1,
 	 bad_literals/1,good_literals/1,constant_propagation/1,
 	 parse_xml/1,get_payload/1,escape/1,num_slots_different/1,
-         beam_bsm/1,guard/1,is_ascii/1]).
+         beam_bsm/1,guard/1,is_ascii/1,non_opt_eq/1]).
 
 -export([coverage_id/1,coverage_external_ignore/2]).
 
@@ -73,7 +73,7 @@ groups() ->
        map_and_binary,unsafe_branch_caching,
        bad_literals,good_literals,constant_propagation,parse_xml,
        get_payload,escape,num_slots_different,
-       beam_bsm,guard,is_ascii]}].
+       beam_bsm,guard,is_ascii,non_opt_eq]}].
 
 
 init_per_suite(Config) ->
@@ -678,6 +678,10 @@ coverage(Config) when is_list(Config) ->
     <<>> = coverage_per_key(<<4:32>>),
     <<$a,$b,$c>> = coverage_per_key(<<7:32,"abc">>),
 
+    binary = coverage_bitstring(<<>>),
+    binary = coverage_bitstring(<<7>>),
+    bitstring = coverage_bitstring(<<7:4>>),
+    other = coverage_bitstring([a]),
     ok.
 
 coverage_fold(Fun, Acc, <<H,T/binary>>) ->
@@ -768,6 +772,10 @@ coverage_per_key(<<BinSize:32,Bin/binary>> = B) ->
     true = (byte_size(B) =:= BinSize),
     Bin.
 
+coverage_bitstring(Bin) when is_binary(Bin) -> binary;
+coverage_bitstring(<<_/bitstring>>) -> bitstring;
+coverage_bitstring(_) -> other.
+
 multiple_uses(Config) when is_list(Config) ->
     {344,62879,345,<<245,159,1,89>>} = multiple_uses_1(<<1,88,245,159,1,89>>),
     true = multiple_uses_2(<<0,0,197,18>>),
@@ -1654,6 +1662,21 @@ do_is_ascii(<<C,_/binary>>) when C >= 16#80 ->
 do_is_ascii(<<_, T/binary>>) ->
     do_is_ascii(T).
 
+non_opt_eq(_Config) ->
+    true = non_opt_eq([], <<>>),
+    true = non_opt_eq([$a], <<$a>>),
+    false = non_opt_eq([$a], <<$b>>),
+    ok.
+
+%% An example from the Efficiency Guide. It used to be not optimized,
+%% but now it can be optimized.
+
+non_opt_eq([H|T1], <<H,T2/binary>>) ->
+    non_opt_eq(T1, T2);
+non_opt_eq([_|_], <<_,_/binary>>) ->
+    false;
+non_opt_eq([], <<>>) ->
+    true.
 
 check(F, R) ->
     R = F().
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index d93c5dda1e..4e39f4663e 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -359,9 +359,7 @@ integer_encoding_1(Config) ->
     io:put_chars(Src, "t(Last) ->[\n"),
     io:put_chars(Data, "[\n"),
 
-    do_integer_encoding(-(id(1) bsl 10000), Src, Data),
-    do_integer_encoding(id(1) bsl 10000, Src, Data),
-    do_integer_encoding(1024, 0, Src, Data),
+    do_integer_encoding(137, 0, Src, Data),
     _ = [begin
 	     B = 1 bsl I,
 	     do_integer_encoding(-B-1, Src, Data),
@@ -370,7 +368,7 @@ integer_encoding_1(Config) ->
 	     do_integer_encoding(B-1, Src, Data),
 	     do_integer_encoding(B, Src, Data),
 	     do_integer_encoding(B+1, Src, Data)
-	 end || I <- lists:seq(1, 128)],
+	 end || I <- lists:seq(1, 130)],
     io:put_chars(Src, "Last].\n\n"),
     ok = file:close(Src),
     io:put_chars(Data, "0].\n\n"),
@@ -384,8 +382,6 @@ integer_encoding_1(Config) ->
     %% Compare lists.
     List = Mod:t(0),
     {ok,[List]} = file:consult(DataFile),
-    OneBsl10000 = id(1) bsl 10000,
-    [-(1 bsl 10000),OneBsl10000|_] = List,
 
     %% Cleanup.
     file:delete(SrcFile),
@@ -404,7 +400,3 @@ do_integer_encoding(I, Src, Data) ->
     Str = integer_to_list(I),
     io:put_chars(Src, [Str,",\n"]),
     io:put_chars(Data, [Str,",\n"]).
-
-    
-id(I) -> I.
-    
diff --git a/lib/compiler/test/trycatch_SUITE.erl b/lib/compiler/test/trycatch_SUITE.erl
index 8cf7928cc4..d5a1dc642f 100644
--- a/lib/compiler/test/trycatch_SUITE.erl
+++ b/lib/compiler/test/trycatch_SUITE.erl
@@ -27,7 +27,7 @@
 	 nested_horrid/1,last_call_optimization/1,bool/1,
 	 plain_catch_coverage/1,andalso_orelse/1,get_in_try/1,
 	 hockey/1,handle_info/1,catch_in_catch/1,grab_bag/1,
-         stacktrace/1,nested_stacktrace/1]).
+         stacktrace/1,nested_stacktrace/1,raise/1]).
 
 -include_lib("common_test/include/ct.hrl").
 
@@ -44,7 +44,7 @@ groups() ->
        nested_after,nested_horrid,last_call_optimization,
        bool,plain_catch_coverage,andalso_orelse,get_in_try,
        hockey,handle_info,catch_in_catch,grab_bag,
-       stacktrace,nested_stacktrace]}].
+       stacktrace,nested_stacktrace,raise]}].
 
 
 init_per_suite(Config) ->
@@ -117,6 +117,16 @@ basic(Conf) when is_list(Conf) ->
 	 catch nisse -> erro
 	 end,
 
+    %% Unmatchable clauses.
+    try
+        throw(thrown)
+    catch
+        {a,b}={a,b,c} ->                        %Intentionally no match.
+            ok;
+        thrown ->
+            ok
+    end,
+
     ok.
 
 after_call() ->
@@ -1159,5 +1169,99 @@ nested_stacktrace_1({X1,C1,V1}, {X2,C2,V2}) ->
             {caught1,S1,T2}
     end.
 
+raise(_Config) ->
+    test_raise(fun() -> exit({exit,tuple}) end),
+    test_raise(fun() -> abs(id(x)) end),
+    test_raise(fun() -> throw({was,thrown}) end),
+
+    badarg = bad_raise(fun() -> abs(id(x)) end),
+
+    ok.
+
+bad_raise(Expr) ->
+    try
+        Expr()
+    catch
+        _:E:Stk ->
+            erlang:raise(bad_class, E, Stk)
+    end.
+
+test_raise(Expr) ->
+    test_raise_1(Expr),
+    test_raise_2(Expr),
+    test_raise_3(Expr).
+
+test_raise_1(Expr) ->
+    erase(exception),
+    try
+        do_test_raise_1(Expr)
+    catch
+        C:E:Stk ->
+            {C,E,Stk} = erase(exception)
+    end.
+
+do_test_raise_1(Expr) ->
+    try
+        Expr()
+    catch
+        C:E:Stk ->
+            %% Here the stacktrace must be built.
+            put(exception, {C,E,Stk}),
+            erlang:raise(C, E, Stk)
+    end.
+
+test_raise_2(Expr) ->
+    erase(exception),
+    try
+        do_test_raise_2(Expr)
+    catch
+        C:E:Stk ->
+            {C,E} = erase(exception),
+            try
+                Expr()
+            catch
+                _:_:S ->
+                    [StkTop|_] = S,
+                    [StkTop|_] = Stk
+            end
+    end.
+
+do_test_raise_2(Expr) ->
+    try
+        Expr()
+    catch
+        C:E:Stk ->
+            %% Here it is possible to replace erlang:raise/3 with
+            %% the raw_raise/3 instruction since the stacktrace is
+            %% not actually used.
+            put(exception, {C,E}),
+            erlang:raise(C, E, Stk)
+    end.
+
+test_raise_3(Expr) ->
+    try
+        do_test_raise_3(Expr)
+    catch
+        exit:{exception,C,E}:Stk ->
+            try
+                Expr()
+            catch
+                C:E:S ->
+                    [StkTop|_] = S,
+                    [StkTop|_] = Stk
+            end
+    end.
+
+do_test_raise_3(Expr) ->
+    try
+        Expr()
+    catch
+        C:E:Stk ->
+            %% Here it is possible to replace erlang:raise/3 with
+            %% the raw_raise/3 instruction since the stacktrace is
+            %% not actually used.
+            erlang:raise(exit, {exception,C,E}, Stk)
+    end.
+
 
 id(I) -> I.