45 files changed, 1036 insertions, 757 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 958d3501c7..cbcbf79839 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -45,6 +45,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/compiler-$(VSN)
 # Target Specs
 # ----------------------------------------------------
 MODULES =  \
+	beam_a \
 	beam_asm \
 	beam_block \
 	beam_bool \
@@ -65,6 +66,7 @@ MODULES =  \
 	beam_type \
 	beam_utils \
 	beam_validator \
+        beam_z \
 	cerl \
 	cerl_clauses \
 	cerl_inline \
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
new file mode 100644
index 0000000000..1c51226314
--- /dev/null
+++ b/lib/compiler/src/beam_a.erl
@@ -0,0 +1,97 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2012. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Run directly after code generation to do any normalization
+%%          or preparation to simplify the optimization passes.
+%%          (Mandatory.)
+
+-module(beam_a).
+
+-export([module/2]).
+
+module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+    Fs = [function(F) || F <- Fs0],
+    {ok,{Mod,Exp,Attr,Fs,Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}) ->
+    try
+	%% Rename certain operations to simplify the optimization passes.
+	Is1 = rename_instrs(Is0),
+
+	%% Remove unusued labels for cleanliness and to help
+	%% optimization passes and HiPE.
+	Is = beam_jump:remove_unused_labels(Is1),
+	{function,Name,Arity,CLabel,Is}
+    catch
+	Class:Error ->
+	    Stack = erlang:get_stacktrace(),
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
+
+rename_instrs([{apply_last,A,N}|Is]) ->
+    [{apply,A},{deallocate,N},return|rename_instrs(Is)];
+rename_instrs([{call_last,A,F,N}|Is]) ->
+    [{call,A,F},{deallocate,N},return|rename_instrs(Is)];
+rename_instrs([{call_ext_last,A,F,N}|Is]) ->
+    [{call_ext,A,F},{deallocate,N},return|rename_instrs(Is)];
+rename_instrs([{call_only,A,F}|Is]) ->
+    [{call,A,F},return|rename_instrs(Is)];
+rename_instrs([{call_ext_only,A,F}|Is]) ->
+    [{call_ext,A,F},return|rename_instrs(Is)];
+rename_instrs([I|Is]) ->
+    [rename_instr(I)|rename_instrs(Is)];
+rename_instrs([]) -> [].
+
+rename_instr({bs_put_binary=I,F,Sz,U,Fl,Src}) ->
+    {bs_put,F,{I,U,Fl},[Sz,Src]};
+rename_instr({bs_put_float=I,F,Sz,U,Fl,Src}) ->
+    {bs_put,F,{I,U,Fl},[Sz,Src]};
+rename_instr({bs_put_integer=I,F,Sz,U,Fl,Src}) ->
+    {bs_put,F,{I,U,Fl},[Sz,Src]};
+rename_instr({bs_put_utf8=I,F,Fl,Src}) ->
+    {bs_put,F,{I,Fl},[Src]};
+rename_instr({bs_put_utf16=I,F,Fl,Src}) ->
+    {bs_put,F,{I,Fl},[Src]};
+rename_instr({bs_put_utf32=I,F,Fl,Src}) ->
+    {bs_put,F,{I,Fl},[Src]};
+%% rename_instr({bs_put_string,_,_}=I) ->
+%%     {bs_put,{f,0},I,[]};
+rename_instr({bs_add=I,F,[Src1,Src2,U],Dst}) when is_integer(U) ->
+    {bif,I,F,[Src1,Src2,{integer,U}],Dst};
+rename_instr({bs_utf8_size=I,F,Src,Dst}) ->
+    {bif,I,F,[Src],Dst};
+rename_instr({bs_utf16_size=I,F,Src,Dst}) ->
+    {bif,I,F,[Src],Dst};
+rename_instr({bs_init2=I,F,Sz,Extra,Live,Flags,Dst}) ->
+    {bs_init,F,{I,Extra,Flags},Live,[Sz],Dst};
+rename_instr({bs_init_bits=I,F,Sz,Extra,Live,Flags,Dst}) ->
+    {bs_init,F,{I,Extra,Flags},Live,[Sz],Dst};
+rename_instr({bs_append=I,F,Sz,Extra,Live,U,Src,Flags,Dst}) ->
+    {bs_init,F,{I,Extra,U,Flags},Live,[Sz,Src],Dst};
+rename_instr({bs_private_append=I,F,Sz,U,Src,Flags,Dst}) ->
+    {bs_init,F,{I,U,Flags},none,[Sz,Src],Dst};
+rename_instr(bs_init_writable=I) ->
+    {bs_init,{f,0},I,1,[{x,0}],{x,0}};
+rename_instr({select_val=I,Reg,Fail,{list,List}}) ->
+    {select,I,Reg,Fail,List};
+rename_instr({select_tuple_arity=I,Reg,Fail,{list,List}}) ->
+    {select,I,Reg,Fail,List};
+rename_instr(send) ->
+    {call_ext,2,send};
+rename_instr(I) -> I.
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index cd568097fa..3e0050382c 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -31,19 +31,16 @@ module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
 
 function({function,Name,Arity,CLabel,Is0}, Lc0) ->
     try
-	%% Extra labels may thwart optimizations.
-	Is1 = beam_jump:remove_unused_labels(Is0),
-
 	%% Collect basic blocks and optimize them.
-	Is2 = blockify(Is1),
-	Is3 = embed_lines(Is2),
-	Is4 = move_allocates(Is3),
-	Is5 = beam_utils:live_opt(Is4),
-	Is6 = opt_blocks(Is5),
-	Is7 = beam_utils:delete_live_annos(Is6),
+	Is1 = blockify(Is0),
+	Is2 = embed_lines(Is1),
+	Is3 = move_allocates(Is2),
+	Is4 = beam_utils:live_opt(Is3),
+	Is5 = opt_blocks(Is4),
+	Is6 = beam_utils:delete_live_annos(Is5),
 
 	%% Optimize bit syntax.
-	{Is,Lc} = bsm_opt(Is7, Lc0),
+	{Is,Lc} = bsm_opt(Is6, Lc0),
 
 	%% Done.
 	{{function,Name,Arity,CLabel,Is},Lc}
@@ -74,9 +71,9 @@ blockify([{bs_save2,R,Point}=I,{test,is_eq_exact,_,_}=Test,
 
 %% Do other peep-hole optimizations.
 blockify([{test,is_atom,{f,Fail},[Reg]}=I|
-	  [{select_val,Reg,{f,Fail},
-	    {list,[{atom,false},{f,_}=BrFalse,
-		   {atom,true}=AtomTrue,{f,_}=BrTrue]}}|Is]=Is0],
+	  [{select,select_val,Reg,{f,Fail},
+	    [{atom,false},{f,_}=BrFalse,
+	     {atom,true}=AtomTrue,{f,_}=BrTrue]}|Is]=Is0],
 	 [{block,Bl}|_]=Acc) ->
     case is_last_bool(Bl, Reg) of
 	false ->
@@ -89,9 +86,9 @@ blockify([{test,is_atom,{f,Fail},[Reg]}=I|
 			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
     end;
 blockify([{test,is_atom,{f,Fail},[Reg]}=I|
-	  [{select_val,Reg,{f,Fail},
-	    {list,[{atom,true}=AtomTrue,{f,_}=BrTrue,
-		   {atom,false},{f,_}=BrFalse]}}|Is]=Is0],
+	  [{select,select_val,Reg,{f,Fail},
+	    [{atom,true}=AtomTrue,{f,_}=BrTrue,
+	     {atom,false},{f,_}=BrFalse]}|Is]=Is0],
 	 [{block,Bl}|_]=Acc) ->
     case is_last_bool(Bl, Reg) of
 	false ->
@@ -423,8 +420,8 @@ inverse_comp_op(_) -> none.
 %%% Evaluation of constant bit fields.
 %%%
 
-is_bs_put({bs_put_integer,_,_,_,_,_}) -> true;
-is_bs_put({bs_put_float,_,_,_,_,_}) -> true;
+is_bs_put({bs_put,_,{bs_put_integer,_,_},_}) -> true;
+is_bs_put({bs_put,_,{bs_put_float,_,_},_}) -> true;
 is_bs_put(_) -> false.
 
 collect_bs_puts(Is) ->
@@ -439,20 +436,24 @@ collect_bs_puts_1([I|Is]=Is0, Acc) ->
 opt_bs_puts(Is) ->
     opt_bs_1(Is, []).
 
-opt_bs_1([{bs_put_float,Fail,{integer,Sz},1,Flags0,Src}=I0|Is], Acc) ->
+opt_bs_1([{bs_put,Fail,
+	   {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) ->
     try eval_put_float(Src, Sz, Flags0) of
 	<<Int:Sz>> ->
 	    Flags = force_big(Flags0),
-	    I = {bs_put_integer,Fail,{integer,Sz},1,Flags,{integer,Int}},
+	    I = {bs_put,Fail,{bs_put_integer,1,Flags},
+		 [{integer,Sz},{integer,Int}]},
 	    opt_bs_1([I|Is], Acc)
     catch
 	error:_ ->
 	    opt_bs_1(Is, [I0|Acc])
     end;
-opt_bs_1([{bs_put_integer,_,{integer,8},1,_,{integer,_}}|_]=IsAll, Acc0) ->
+opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll,
+	 Acc0) ->
     {Is,Acc} = bs_collect_string(IsAll, Acc0),
     opt_bs_1(Is, Acc);
-opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when Sz > 8 ->
+opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0],
+	 Acc) when Sz > 8 ->
     case field_endian(F) of
 	big ->
 	    %% We can do this optimization for any field size without risk
@@ -466,14 +467,14 @@ opt_bs_1([{bs_put_integer,Fail,{integer,Sz},1,F,{integer,N}}=I|Is0], Acc) when S
 	    %% an explosion in code size.
 	    <<Int:Sz>> = <<N:Sz/little>>,
 	    Flags = force_big(F),
-	    Is = [{bs_put_integer,Fail,{integer,Sz},1,
-		   Flags,{integer,Int}}|Is0],
+	    Is = [{bs_put,Fail,{bs_put_integer,1,Flags},
+		   [{integer,Sz},{integer,Int}]}|Is0],
 	    opt_bs_1(Is, Acc);
 	_ -> 					%native or too wide little field
 	    opt_bs_1(Is0, [I|Acc])
     end;
-opt_bs_1([{Op,Fail,{integer,Sz},U,F,Src}|Is], Acc) when U > 1 ->
-    opt_bs_1([{Op,Fail,{integer,U*Sz},1,F,Src}|Is], Acc);
+opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 ->
+    opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc);
 opt_bs_1([I|Is], Acc) ->
     opt_bs_1(Is, [I|Acc]);
 opt_bs_1([], Acc) -> reverse(Acc).
@@ -489,17 +490,17 @@ eval_put_float(Src, Sz, Flags) when Sz =< 256 -> %Only evaluate if Sz is reasona
 value({integer,I}) -> I;
 value({float,F}) -> F.
 
-bs_collect_string(Is, [{bs_put_string,Len,{string,Str}}|Acc]) ->
+bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) ->
     bs_coll_str_1(Is, Len, reverse(Str), Acc);
 bs_collect_string(Is, Acc) ->
     bs_coll_str_1(Is, 0, [], Acc).
     
-bs_coll_str_1([{bs_put_integer,_,{integer,Sz},U,_,{integer,V}}|Is],
+bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is],
 	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
     Byte = V band 16#FF,
     bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
 bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
-    {Is,[{bs_put_string,Len,{string,reverse(StrAcc)}}|IsAcc]}.
+    {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}.
 
 field_endian({field_flags,F}) -> field_endian_1(F).
 
@@ -531,15 +532,17 @@ bs_split_int(N, Sz, Fail, Acc) ->
     bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
 
 bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,-1}},
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,Sz},{integer,-1}]},
     [I|Acc];
 bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put_integer,Fail,{integer,Sz},1,{field_flags,[big]},{integer,0}},
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,Sz},{integer,0}]},
     [I|Acc];
 bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
     Mask = (1 bsl ByteSz) - 1,
-    I = {bs_put_integer,Fail,{integer,ByteSz},1,
-	 {field_flags,[big]},{integer,N band Mask}},
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,ByteSz},{integer,N band Mask}]},
     bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
 bs_split_int_1(_, _, _, _, Acc) -> Acc.
 
@@ -577,9 +580,9 @@ bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) ->
     bsm_reroute(Is, D, {Reg,Save}, [I|Acc]);
 bsm_reroute([{label,_}=I|Is], D, S, Acc) ->
     bsm_reroute(Is, D, S, [I|Acc]);
-bsm_reroute([{select_val,Reg,F0,{list,Lbls0}}|Is], D, {_,Save}=S, Acc0) ->
+bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) ->
     [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D),
-    Acc = [{select_val,Reg,F,{list,Lbls}}|Acc0],
+    Acc = [{select,select_val,Reg,F,Lbls}|Acc0],
     bsm_reroute(Is, D, S, Acc);
 bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) ->
     F = bsm_subst_label(F0, Save, D),
@@ -615,10 +618,6 @@ bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is],
 	  [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) ->
     bsm_opt_2(Is, [{test,bs_skip_bits2,F,
 		    [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
-bsm_opt_2([{test,bs_match_string,F,[Ctx,Bin1]},
-	   {test,bs_match_string,F,[Ctx,Bin2]}|Is], Acc) ->
-    I = {test,bs_match_string,F,[Ctx,<<Bin1/bitstring,Bin2/bitstring>>]},
-    bsm_opt_2([I|Is], Acc);
 bsm_opt_2([I|Is], Acc) ->
     bsm_opt_2(Is, [I|Acc]);
 bsm_opt_2([], Acc) -> reverse(Acc).
diff --git a/lib/compiler/src/beam_bool.erl b/lib/compiler/src/beam_bool.erl
index d9ea6f5a70..81be262d6d 100644
--- a/lib/compiler/src/beam_bool.erl
+++ b/lib/compiler/src/beam_bool.erl
@@ -168,18 +168,18 @@ bopt_block(Reg, Fail, OldIs, [{block,Bl0}|Acc0], St0) ->
     end.
 
 %% ensure_opt_safe(OriginalCode, OptCode, FollowingCode, Fail,
-%%             ReversedPreceedingCode, State) -> ok
+%%             ReversedPrecedingCode, State) -> ok
 %%  Comparing the original code to the optimized code, determine
 %%  whether the optimized code is guaranteed to work in the same
 %%  way as the original code.
 %%
 %%  Throw an exception if the optimization is not safe.
 %%
-ensure_opt_safe(Bl, NewCode, OldIs, Fail, PreceedingCode, St) ->
+ensure_opt_safe(Bl, NewCode, OldIs, Fail, PrecedingCode, St) ->
     %% Here are the conditions that must be true for the
     %% optimization to be safe.
     %%
-    %% 1. If a register is INITIALIZED by PreceedingCode,
+    %% 1. If a register is INITIALIZED by PrecedingCode,
     %%    then if that register assigned a value in the original
     %%    code, but not in the optimized code, it must be UNUSED or KILLED
     %%    in the code that follows.
@@ -190,29 +190,50 @@ ensure_opt_safe(Bl, NewCode, OldIs, Fail, PreceedingCode, St) ->
     %%    by the code that follows.
     %%
     %% 3. Any register that is assigned a value in the optimized
-    %%    code must be UNUSED or KILLED in the following code
-    %%    (because the register might be assigned the wrong value,
-    %%    and even if the value is right it might no longer be
-    %%    assigned on *all* paths leading to its use).
+    %%    code must be UNUSED or KILLED in the following code,
+    %%    unless we can be sure that it is always assigned the same
+    %%    value.
 
-    InitInPreceeding = initialized_regs(PreceedingCode),
+    InitInPreceding = initialized_regs(PrecedingCode),
 
     PrevDst = dst_regs(Bl),
     NewDst = dst_regs(NewCode),
     NotSet = ordsets:subtract(PrevDst, NewDst),
-    MustBeKilled = ordsets:subtract(NotSet, InitInPreceeding),
-    MustBeUnused = ordsets:subtract(ordsets:union(NotSet, NewDst), MustBeKilled),
+    MustBeKilled = ordsets:subtract(NotSet, InitInPreceding),
 
     case all_killed(MustBeKilled, OldIs, Fail, St) of
 	false -> throw(all_registers_not_killed);
 	true -> ok
     end,
+    Same = assigned_same_value(Bl, NewCode),
+    MustBeUnused = ordsets:subtract(ordsets:union(NotSet, NewDst),
+				    ordsets:union(MustBeKilled, Same)),
     case none_used(MustBeUnused, OldIs, Fail, St) of
 	false -> throw(registers_used);
 	true -> ok
     end,
     ok.
 
+%% assigned_same_value(OldCode, NewCodeReversed) -> [DestinationRegs]
+%%  Return an ordset with a list of all y registers that are always
+%%  assigned the same value in the old and new code. Currently, we
+%%  are very conservative in that we only consider identical move
+%%  instructions in the same order.
+%%
+assigned_same_value(Old, New) ->
+    case reverse(New) of
+	[{block,Bl}|_] ->
+	    assigned_same_value(Old, Bl, []);
+	_ ->
+	    ordsets:new()
+    end.
+
+assigned_same_value([{set,[{y,_}=D],[S],move}|T1],
+		    [{set,[{y,_}=D],[S],move}|T2], Acc) ->
+    assigned_same_value(T1, T2, [D|Acc]);
+assigned_same_value(_, _, Acc) ->
+    ordsets:from_list(Acc).
+
 update_fail_label([{set,_,_,move}=I|Is], Fail, Acc) ->
     update_fail_label(Is, Fail, [I|Acc]);
 update_fail_label([{set,Ds,As,{bif,N,{f,_}}}|Is], Fail, Acc) ->
diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl
index 1217f7f777..02794a8e18 100644
--- a/lib/compiler/src/beam_bsm.erl
+++ b/lib/compiler/src/beam_bsm.erl
@@ -204,16 +204,6 @@ btb_reaches_match_1(Is, Regs, D) ->
 btb_reaches_match_2([{block,Bl}|Is], Regs0, D) ->
     Regs = btb_reaches_match_block(Bl, Regs0),
     btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{call_only,Arity,{f,Lbl}}|_], Regs0, D) ->
-    Regs = btb_kill_not_live(Arity, Regs0),
-    btb_tail_call(Lbl, Regs, D);
-btb_reaches_match_2([{call_ext_only,Arity,Func}|_], Regs0, D) ->
-    Regs = btb_kill_not_live(Arity, Regs0),
-    btb_tail_call(Func, Regs, D);
-btb_reaches_match_2([{call_last,Arity,{f,Lbl},_}|_], Regs0, D) ->
-    Regs1 = btb_kill_not_live(Arity, Regs0),
-    Regs = btb_kill_yregs(Regs1),
-    btb_tail_call(Lbl, Regs, D);
 btb_reaches_match_2([{call,Arity,{f,Lbl}}|Is], Regs, D) ->
     btb_call(Arity, Lbl, Regs, Is, D);
 btb_reaches_match_2([{apply,Arity}|Is], Regs, D) ->
@@ -222,19 +212,16 @@ btb_reaches_match_2([{call_fun,Live}=I|Is], Regs, D) ->
     btb_call(Live, I, Regs, Is, D);
 btb_reaches_match_2([{make_fun2,_,_,_,Live}|Is], Regs, D) ->
     btb_call(Live, make_fun2, Regs, Is, D);
-btb_reaches_match_2([{call_ext,Arity,{extfunc,Mod,Name,Arity}=Func}|Is], Regs0, D) ->
+btb_reaches_match_2([{call_ext,Arity,Func}=I|Is], Regs0, D) ->
     %% Allow us scanning beyond the call in case the match
     %% context is saved on the stack.
-    case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+    case beam_jump:is_exit_instruction(I) of
 	false ->
 	    btb_call(Arity, Func, Regs0, Is, D);
 	true ->
 	    Regs = btb_kill_not_live(Arity, Regs0),
 	    btb_tail_call(Func, Regs, D)
     end;
-btb_reaches_match_2([{call_ext_last,Arity,_,_}=I|_], Regs, D) ->
-    btb_ensure_not_used(btb_regs_from_arity(Arity), I, Regs),
-    D;
 btb_reaches_match_2([{kill,Y}|Is], Regs, D) ->
     btb_reaches_match_1(Is, btb_kill([Y], Regs), D);
 btb_reaches_match_2([{deallocate,_}|Is], Regs0, D) ->
@@ -254,21 +241,45 @@ btb_reaches_match_2([{bif,_,{f,F},Ss,Dst}=I|Is], Regs0, D0) ->
     Regs = btb_kill([Dst], Regs0),
     D = btb_follow_branch(F, Regs, D0),
     btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{test,bs_start_match2,_,_,[Ctx,_],Ctx}|Is], Regs, D) ->
-    case btb_context_regs(Regs) of
-	[Ctx] ->
-	    D;
-	CtxRegs ->
-	    case member(Ctx, CtxRegs) of
-		false -> btb_reaches_match_2(Is, Regs, D);
-		true -> btb_error(unsuitable_bs_start_match)
+btb_reaches_match_2([{test,bs_start_match2,{f,F},Live,[Ctx,_],Ctx}=I|Is],
+		    Regs0, D0) ->
+    CtxRegs = btb_context_regs(Regs0),
+    case member(Ctx, CtxRegs) of
+	false ->
+	    %% This bs_start_match2 instruction does not use "our"
+	    %% match state. Therefore we can continue the search
+	    %% for another bs_start_match2 instruction.
+	    D = btb_follow_branch(F, Regs0, D0),
+	    Regs = btb_kill_not_live(Live, Regs0),
+	    btb_reaches_match_2(Is, Regs, D);
+	true ->
+	    %% OK. This instruction will use "our" match state,
+	    %% but we must make sure that all other copies of the
+	    %% match state are killed in the code that follows
+	    %% the instruction. (We know that the fail branch cannot
+	    %% be taken in this case.)
+	    OtherCtxRegs = CtxRegs -- [Ctx],
+	    case btb_are_all_unused(OtherCtxRegs, Is, D0) of
+		false -> btb_error({OtherCtxRegs,not_all_unused_after,I});
+		true -> D0
 	    end
     end;
-btb_reaches_match_2([{test,bs_start_match2,_,_,[Bin,_],Ctx}|Is], Regs, D) ->
-    CtxRegs = btb_context_regs(Regs),
+btb_reaches_match_2([{test,bs_start_match2,{f,F},Live,[Bin,_],Ctx}|Is],
+		    Regs0, D0) ->
+    CtxRegs = btb_context_regs(Regs0),
     case member(Bin, CtxRegs) orelse member(Ctx, CtxRegs) of
-	false -> btb_reaches_match_2(Is, Regs, D);
-	true -> btb_error(unsuitable_bs_start_match)
+	false ->
+	    %% This bs_start_match2 does not reference any copy of the
+	    %% match state. Therefore it can safely be passed on the
+	    %% way to another (perhaps more suitable) bs_start_match2
+	    %% instruction.
+	    D = btb_follow_branch(F, Regs0, D0),
+	    Regs = btb_kill_not_live(Live, Regs0),
+	    btb_reaches_match_2(Is, Regs, D);
+	true ->
+	    %% This variant of the bs_start_match2 instruction does
+	    %% not accept a match state as source.
+	    btb_error(unsuitable_bs_start_match)
     end;
 btb_reaches_match_2([{test,_,{f,F},Ss}=I|Is], Regs, D0) ->
     btb_ensure_not_used(Ss, I, Regs),
@@ -278,12 +289,7 @@ btb_reaches_match_2([{test,_,{f,F},_,Ss,_}=I|Is], Regs, D0) ->
     btb_ensure_not_used(Ss, I, Regs),
     D = btb_follow_branch(F, Regs, D0),
     btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{select_val,Src,{f,F},{list,Conds}}=I|Is], Regs, D0) ->
-    btb_ensure_not_used([Src], I, Regs),
-    D1 = btb_follow_branch(F, Regs, D0),
-    D = btb_follow_branches(Conds, Regs, D1),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{select_tuple_arity,Src,{f,F},{list,Conds}}=I|Is], Regs, D0) ->
+btb_reaches_match_2([{select,_,Src,{f,F},Conds}=I|Is], Regs, D0) ->
     btb_ensure_not_used([Src], I, Regs),
     D1 = btb_follow_branch(F, Regs, D0),
     D = btb_follow_branches(Conds, Regs, D1),
@@ -293,46 +299,11 @@ btb_reaches_match_2([{jump,{f,Lbl}}|_], Regs, #btb{index=Li}=D) ->
     btb_reaches_match_2(Is, Regs, D);
 btb_reaches_match_2([{label,_}|Is], Regs, D) ->
     btb_reaches_match_2(Is, Regs, D);
-btb_reaches_match_2([{bs_add,{f,0},_,Dst}|Is], Regs, D) ->
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([bs_init_writable|Is], Regs0, D) ->
-    Regs = btb_kill_not_live(0, Regs0),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_init2,{f,0},_,_,_,_,Dst}|Is], Regs, D) ->
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_init_bits,{f,0},_,_,_,_,Dst}|Is], Regs, D) ->
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_append,{f,0},_,_,_,_,Src,_,Dst}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_private_append,{f,0},_,_,Src,_,Dst}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_put_integer,{f,0},_,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_put_float,{f,0},_,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_put_binary,{f,0},_,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_put_string,_,_}|Is], Regs, D) ->
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_utf8_size,_,Src,Dst}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_utf16_size,_,Src,Dst}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
+btb_reaches_match_2([{bs_init,{f,0},_,_,Ss,Dst}=I|Is], Regs, D) ->
+    btb_ensure_not_used(Ss, I, Regs),
     btb_reaches_match_1(Is, btb_kill([Dst], Regs), D);
-btb_reaches_match_2([{bs_put_utf8,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_put_utf16,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
-    btb_reaches_match_1(Is, Regs, D);
-btb_reaches_match_2([{bs_put_utf32,_,_,Src}=I|Is], Regs, D) ->
-    btb_ensure_not_used([Src], I, Regs),
+btb_reaches_match_2([{bs_put,{f,0},_,Ss}=I|Is], Regs, D) ->
+    btb_ensure_not_used(Ss, I, Regs),
     btb_reaches_match_1(Is, Regs, D);
 btb_reaches_match_2([{bs_restore2,Src,_}=I|Is], Regs0, D) ->
     case btb_contains_context(Src, Regs0) of
@@ -340,11 +311,11 @@ btb_reaches_match_2([{bs_restore2,Src,_}=I|Is], Regs0, D) ->
 	    btb_reaches_match_1(Is, Regs0, D);
 	true ->
 	    %% Check that all other copies of the context registers
-	    %% are killed by the following instructions.
+	    %% are unused by the following instructions.
 	    Regs = btb_kill([Src], Regs0),
 	    CtxRegs = btb_context_regs(Regs),
-	    case btb_are_all_killed(CtxRegs, Is, D) of
-		false -> btb_error({CtxRegs,not_all_killed_after,I});
+	    case btb_are_all_unused(CtxRegs, Is, D) of
+		false -> btb_error({CtxRegs,not_all_unused_after,I});
 		true -> D#btb{must_not_save=true}
 	    end
     end;
@@ -354,11 +325,11 @@ btb_reaches_match_2([{bs_context_to_binary,Src}=I|Is], Regs0, D) ->
 	    btb_reaches_match_1(Is, Regs0, D);
 	true ->
 	    %% Check that all other copies of the context registers
-	    %% are killed by the following instructions.
+	    %% are unused by the following instructions.
 	    Regs = btb_kill([Src], Regs0),
 	    CtxRegs = btb_context_regs(Regs),
-	    case btb_are_all_killed(CtxRegs, Is, D) of
-		false -> btb_error({CtxRegs,not_all_killed_after,I});
+	    case btb_are_all_unused(CtxRegs, Is, D) of
+		false -> btb_error({CtxRegs,not_all_unused_after,I});
 		true -> D#btb{must_not_save=true}
 	    end
     end;
@@ -389,13 +360,16 @@ btb_call(Arity, Lbl, Regs0, Is, D0) ->
 	    %% First handle the call as if it were a tail call.
 	    D = btb_tail_call(Lbl, Regs, D0),
 
-	    %% No problem so far, but now we must make sure that
-	    %% we don't have any copies of the match context
-	    %% tucked away in an y register.
+	    %% No problem so far (the called function can handle a
+	    %% match context). Now we must make sure that the rest
+	    %% of this function following the call does not attempt
+	    %% to use the match context in case there is a copy
+	    %% tucked away in a y register.
 	    RegList = btb_context_regs(Regs),
-	    case [R || {y,_}=R <- RegList] of
-		[] -> D;
-		[_|_] -> btb_error({multiple_uses,RegList})
+	    YRegs = [R || {y,_}=R <- RegList],
+	    case btb_are_all_unused(YRegs, Is, D) of
+		true -> D;
+		false -> btb_error({multiple_uses,RegList})
 	    end;
 	true ->
 	    %% No match context in any x register. It could have been
@@ -475,21 +449,12 @@ btb_reaches_match_block([{set,Ds,Ss,_}=I|Is], Regs0) ->
 btb_reaches_match_block([], Regs) ->
     Regs.
 
-%% btb_regs_from_arity(Arity) -> [Register])
-%%  Create a list of x registers from a function arity.
-
-btb_regs_from_arity(Arity) ->
-    btb_regs_from_arity_1(Arity, []).
-
-btb_regs_from_arity_1(0, Acc) -> Acc;
-btb_regs_from_arity_1(N, Acc) -> btb_regs_from_arity_1(N-1, [{x,N-1}|Acc]).
-
 %% btb_are_all_killed([Register], [Instruction], D) -> true|false
-%%  Test whether all of the register are killed in the instruction stream.
+%%  Test whether all of the register are unused in the instruction stream.
 
-btb_are_all_killed(RegList, Is, #btb{index=Li}) ->
+btb_are_all_unused(RegList, Is, #btb{index=Li}) ->
     all(fun(R) ->
-		beam_utils:is_killed(R, Is, Li)
+		beam_utils:is_not_used(R, Is, Li)
 	end, RegList).
 
 %% btp_regs_from_list([Register]) -> RegisterSet.
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index a7994ab3b3..26ba93b91c 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -74,10 +74,6 @@ find_all_used([], _All, Used) -> Used.
 
 update_work_list([{call,_,{f,L}}|Is], Sets) ->
     update_work_list(Is, add_to_work_list(L, Sets));
-update_work_list([{call_last,_,{f,L},_}|Is], Sets) ->
-    update_work_list(Is, add_to_work_list(L, Sets));
-update_work_list([{call_only,_,{f,L}}|Is], Sets) ->
-    update_work_list(Is, add_to_work_list(L, Sets));
 update_work_list([{make_fun2,{f,L},_,_,_}|Is], Sets) ->
     update_work_list(Is, add_to_work_list(L, Sets));
 update_work_list([_|Is], Sets) ->
@@ -200,7 +196,7 @@ replace([{test,Test,{f,Lbl},Ops}|Is], Acc, D) ->
     replace(Is, [{test,Test,{f,label(Lbl, D)},Ops}|Acc], D);
 replace([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D) ->
     replace(Is, [{test,Test,{f,label(Lbl, D)},Live,Ops,Dst}|Acc], D);
-replace([{select_val,R,{f,Fail0},{list,Vls0}}|Is], Acc, D) ->
+replace([{select,I,R,{f,Fail0},Vls0}|Is], Acc, D) ->
     Vls1 = map(fun ({f,L}) -> {f,label(L, D)};
 		   (Other) -> Other end, Vls0),
     Fail = label(Fail0, D),
@@ -210,12 +206,8 @@ replace([{select_val,R,{f,Fail0},{list,Vls0}}|Is], Acc, D) ->
 	    %% Convert to a plain jump.
 	    replace(Is, [{jump,{f,Fail}}|Acc], D);
 	Vls ->
-	    replace(Is, [{select_val,R,{f,Fail},{list,Vls}}|Acc], D)
+	    replace(Is, [{select,I,R,{f,Fail},Vls}|Acc], D)
     end;
-replace([{select_tuple_arity,R,{f,Fail},{list,Vls0}}|Is], Acc, D) ->
-    Vls = map(fun ({f,L}) -> {f,label(L, D)};
-		  (Other) -> Other end, Vls0),
-    replace(Is, [{select_tuple_arity,R,{f,label(Fail, D)},{list,Vls}}|Acc], D);
 replace([{'try',R,{f,Lbl}}|Is], Acc, D) ->
     replace(Is, [{'try',R,{f,label(Lbl, D)}}|Acc], D);
 replace([{'catch',R,{f,Lbl}}|Is], Acc, D) ->
@@ -236,37 +228,12 @@ replace([{gc_bif,Name,{f,Lbl},Live,As,R}|Is], Acc, D) when Lbl =/= 0 ->
     replace(Is, [{gc_bif,Name,{f,label(Lbl, D)},Live,As,R}|Acc], D);
 replace([{call,Ar,{f,Lbl}}|Is], Acc, D) ->
     replace(Is, [{call,Ar,{f,label(Lbl,D)}}|Acc], D);
-replace([{call_last,Ar,{f,Lbl},N}|Is], Acc, D) ->
-    replace(Is, [{call_last,Ar,{f,label(Lbl,D)},N}|Acc], D);
-replace([{call_only,Ar,{f,Lbl}}|Is], Acc, D) ->
-    replace(Is, [{call_only,Ar,{f,label(Lbl, D)}}|Acc], D);
 replace([{make_fun2,{f,Lbl},U1,U2,U3}|Is], Acc, D) ->
     replace(Is, [{make_fun2,{f,label(Lbl, D)},U1,U2,U3}|Acc], D);
-replace([{bs_init2,{f,Lbl},Sz,Words,R,F,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_init2,{f,label(Lbl, D)},Sz,Words,R,F,Dst}|Acc], D);
-replace([{bs_init_bits,{f,Lbl},Sz,Words,R,F,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_init_bits,{f,label(Lbl, D)},Sz,Words,R,F,Dst}|Acc], D);
-replace([{bs_put_integer,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_put_integer,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D);
-replace([{bs_put_utf8=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D);
-replace([{bs_put_utf16=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D);
-replace([{bs_put_utf32=I,{f,Lbl},Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Fl,Val}|Acc], D);
-replace([{bs_put_binary,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_put_binary,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D);
-replace([{bs_put_float,{f,Lbl},Bits,Unit,Fl,Val}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_put_float,{f,label(Lbl, D)},Bits,Unit,Fl,Val}|Acc], D);
-replace([{bs_add,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{bs_add,{f,label(Lbl, D)},Src,Dst}|Acc], D);
-replace([{bs_append,{f,Lbl},_,_,_,_,_,_,_}=I0|Is], Acc, D) when Lbl =/= 0 ->
-    I = setelement(2, I0, {f,label(Lbl, D)}),
-    replace(Is, [I|Acc], D);
-replace([{bs_utf8_size=I,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Src,Dst}|Acc], D);
-replace([{bs_utf16_size=I,{f,Lbl},Src,Dst}|Is], Acc, D) when Lbl =/= 0 ->
-    replace(Is, [{I,{f,label(Lbl, D)},Src,Dst}|Acc], D);
+replace([{bs_init,{f,Lbl},Info,Live,Ss,Dst}|Is], Acc, D) when Lbl =/= 0 ->
+    replace(Is, [{bs_init,{f,label(Lbl, D)},Info,Live,Ss,Dst}|Acc], D);
+replace([{bs_put,{f,Lbl},Info,Ss}|Is], Acc, D) when Lbl =/= 0 ->
+    replace(Is, [{bs_put,{f,label(Lbl, D)},Info,Ss}|Acc], D);
 replace([I|Is], Acc, D) ->
     replace(Is, [I|Acc], D);
 replace([], Acc, _) -> Acc.
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index 5f12a98f09..92d8e5acb3 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -182,7 +182,7 @@ forward(Is, Lc) ->
 forward([{block,[]}|Is], D, Lc, Acc) ->
     %% Empty blocks can prevent optimizations.
     forward(Is, D, Lc, Acc);
-forward([{select_val,Reg,_,{list,List}}=I|Is], D0, Lc, Acc) ->
+forward([{select,select_val,Reg,_,List}=I|Is], D0, Lc, Acc) ->
     D = update_value_dict(List, Reg, D0),
     forward(Is, D, Lc, [I|Acc]);
 forward([{label,Lbl}=LblI,{block,[{set,[Dst],[Lit],move}|BlkIs]}=Blk|Is], D, Lc, Acc) ->
@@ -271,11 +271,11 @@ backward([{test,is_eq_exact,Fail,[Dst,{integer,Arity}]}=I|
     end;
 backward([{label,Lbl}=L|Is], D, Acc) ->
     backward(Is, beam_utils:index_label(Lbl, Acc, D), [L|Acc]);
-backward([{select_val,Reg,{f,Fail0},{list,List0}}|Is], D, Acc) ->
+backward([{select,select_val,Reg,{f,Fail0},List0}|Is], D, Acc) ->
     List = shortcut_select_list(List0, Reg, D, []),
     Fail1 = shortcut_label(Fail0, D),
     Fail = shortcut_bs_test(Fail1, Is, D),
-    Sel = {select_val,Reg,{f,Fail},{list,List}},
+    Sel = {select,select_val,Reg,{f,Fail},List},
     backward(Is, D, [Sel|Acc]);
 backward([{jump,{f,To0}},{move,Src,Reg}=Move0|Is], D, Acc) ->
     {To,Move} = case Src of
@@ -382,7 +382,7 @@ shortcut_select_label(To0, Reg, Val, D) ->
     case beam_utils:code_at(To0, D) of
  	[{jump,{f,To}}|_] ->
  	    shortcut_select_label(To, Reg, Val, D);
-	[{test,is_atom,_,[Reg]},{select_val,Reg,{f,Fail},{list,Map}}|_] ->
+	[{test,is_atom,_,[Reg]},{select,select_val,Reg,{f,Fail},Map}|_] ->
 	    To = find_select_val(Map, Val, Fail),
 	    shortcut_select_label(To, Reg, Val, D);
   	[{test,is_eq_exact,{f,_},[Reg,{atom,Val}]},{label,To}|_] when is_atom(Val) ->
@@ -472,10 +472,10 @@ combine_eqs(To, [Reg,{Type,_}=Lit1]=Ops, D, [{label,L1}|_])
     case beam_utils:code_at(To, D) of
 	[{test,is_eq_exact,{f,F2},[Reg,{Type,_}=Lit2]},
 	 {label,L2}|_] when Lit1 =/= Lit2 ->
-	    {select_val,Reg,{f,F2},{list,[Lit1,{f,L1},Lit2,{f,L2}]}};
-	[{select_val,Reg,{f,F2},{list,[{Type,_}|_]=List0}}|_] ->
+	    {select,select_val,Reg,{f,F2},[Lit1,{f,L1},Lit2,{f,L2}]};
+	[{select,select_val,Reg,{f,F2},[{Type,_}|_]=List0}|_] ->
 	    List = remove_from_list(Lit1, List0),
-	    {select_val,Reg,{f,F2},{list,[Lit1,{f,L1}|List]}};
+	    {select,select_val,Reg,{f,F2},[Lit1,{f,L1}|List]};
 	_Is ->
 	    {test,is_eq_exact,{f,To},Ops}
 	end;
@@ -527,6 +527,8 @@ count_bits_matched([{test,_,_,_,[_,Sz,U,{field_flags,_}],_}|Is], SavePoint, Bits
 	{integer,N} -> count_bits_matched(Is, SavePoint, Bits+N*U);
 	_ -> count_bits_matched(Is, SavePoint, Bits)
     end;
+count_bits_matched([{test,bs_match_string,_,[_,Bits,_]}|Is], SavePoint, Bits0) ->
+    count_bits_matched(Is, SavePoint, Bits0+Bits);
 count_bits_matched([{test,_,_,_}|Is], SavePoint, Bits) ->
     count_bits_matched(Is, SavePoint, Bits);
 count_bits_matched([{bs_save2,Reg,SavePoint}|_], {Reg,SavePoint}, Bits) ->
diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl
index fb1a43cd9e..14e9943f88 100644
--- a/lib/compiler/src/beam_except.erl
+++ b/lib/compiler/src/beam_except.erl
@@ -65,10 +65,6 @@ function_1(Is0) ->
 
 translate([{call_ext,Ar,{extfunc,erlang,error,Ar}}=I|Is], St, Acc) ->
     translate_1(Ar, I, Is, St, Acc);
-translate([{call_ext_only,Ar,{extfunc,erlang,error,Ar}}=I|Is], St, Acc) ->
-    translate_1(Ar, I, Is, St, Acc);
-translate([{call_ext_last,Ar,{extfunc,erlang,error,Ar},_}=I|Is], St, Acc) ->
-    translate_1(Ar, I, Is, St, Acc);
 translate([I|Is], St, Acc) ->
     translate(Is, St, [I|Acc]);
 translate([], _, Acc) ->
diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl
index 6c7cb849aa..04232d8fd2 100644
--- a/lib/compiler/src/beam_flatten.erl
+++ b/lib/compiler/src/beam_flatten.erl
@@ -79,49 +79,28 @@ norm_allocate({nozero,Ns,Nh,Inits}, Regs) ->
 
 %% insert_alloc_in_bs_init(ReverseInstructionStream, AllocationInfo) ->
 %%                                  impossible | ReverseInstructionStream'
-%%   A bs_init2/6 instruction should not be followed by a test heap instruction.
+%%   A bs_init/6 instruction should not be followed by a test heap instruction.
 %%   Given the AllocationInfo from a test heap instruction, merge the
-%%   allocation amounts into the previous bs_init2/6 instruction (if any).
+%%   allocation amounts into the previous bs_init/6 instruction (if any).
 %%
-insert_alloc_in_bs_init([I|_]=Is, Alloc) ->
-    case is_bs_constructor(I) of
-	false -> impossible;
-	true -> insert_alloc_1(Is, Alloc, [])
-    end.
-
-insert_alloc_1([{bs_init2=Op,Fail,Bs,Ws1,Regs,F,Dst}|Is], {_,nostack,Ws2,[]}, Acc) ->
-    Al = beam_utils:combine_heap_needs(Ws1, Ws2),
-    I = {Op,Fail,Bs,Al,Regs,F,Dst},
-    reverse(Acc, [I|Is]);
-insert_alloc_1([{bs_init_bits=Op,Fail,Bs,Ws1,Regs,F,Dst}|Is], {_,nostack,Ws2,[]}, Acc) ->
-    Al = beam_utils:combine_heap_needs(Ws1, Ws2),
-    I = {Op,Fail,Bs,Al,Regs,F,Dst},
-    reverse(Acc, [I|Is]);
-insert_alloc_1([{bs_append,Fail,Sz,Ws1,Regs,U,Bin,Fl,Dst}|Is],
-	       {_,nostack,Ws2,[]}, Acc) ->
+insert_alloc_in_bs_init([{bs_put,_,_,_}=I|Is], Alloc) ->
+    %% The instruction sequence ends with an bs_put/4 instruction.
+    %% We'll need to search backwards for the bs_init/6 instruction.
+    insert_alloc_1(Is, Alloc, [I]);
+insert_alloc_in_bs_init(_, _) -> impossible.
+
+insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is],
+	       {_,nostack,Ws2,[]}, Acc) when is_integer(Live) ->
+    %% The number of extra heap words is always in the second position
+    %% in the Info tuple.
+    Ws1 = element(2, Info0),
     Al = beam_utils:combine_heap_needs(Ws1, Ws2),
-    I = {bs_append,Fail,Sz,Al,Regs,U,Bin,Fl,Dst},
+    Info = setelement(2, Info0, Al),
+    I = {Op,Fail,Info,Live,Ss,Dst},
     reverse(Acc, [I|Is]);
-insert_alloc_1([I|Is], Alloc, Acc) ->
+insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) ->
     insert_alloc_1(Is, Alloc, [I|Acc]).
 
-
-%% is_bs_constructor(Instruction) -> true|false.
-%%  Test whether the instruction is a bit syntax construction
-%%  instruction that can occur at the end of a bit syntax
-%%  construction. (Since an empty binary would be expressed
-%%  as a literal, the bs_init2/6 instruction will not occur
-%%  at the end and therefore it is no need to test for it here.)
-%%
-is_bs_constructor({bs_put_integer,_,_,_,_,_}) -> true;
-is_bs_constructor({bs_put_utf8,_,_,_}) -> true;
-is_bs_constructor({bs_put_utf16,_,_,_}) -> true;
-is_bs_constructor({bs_put_utf32,_,_,_}) -> true;
-is_bs_constructor({bs_put_float,_,_,_,_,_}) -> true;
-is_bs_constructor({bs_put_binary,_,_,_,_,_}) -> true;
-is_bs_constructor({bs_put_string,_,_}) -> true;
-is_bs_constructor(_) -> false.
-
 %% opt(Is0) -> Is
 %%  Simple peep-hole optimization to move a {move,Any,{x,0}} past
 %%  any kill up to the next call instruction. (To give the loader
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index db67d24514..636c299e47 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -20,7 +20,7 @@
 
 -module(beam_jump).
 
--export([module/2,module_labels/1,
+-export([module/2,
 	 is_unreachable_after/1,is_exit_instruction/1,
 	 remove_unused_labels/1,is_label_used_in/2]).
 
@@ -46,10 +46,13 @@
 %%%     such as a jump that never transfers control to the instruction
 %%%     following it.
 %%%
-%%% (2) case_end, if_end, and badmatch, and function calls that cause an
-%%%     exit (such as calls to exit/1) are moved to the end of the function.
-%%%     The purpose is to allow further optimizations at the place from
-%%%     which the code was moved.
+%%% (2) Short sequences starting with a label and ending in case_end, if_end,
+%%%     and badmatch, and function calls that cause an exit (such as calls
+%%%     to exit/1) are moved to the end of the function, but only if the
+%%%     the block is not entered via a fallthrough. The purpose of this move
+%%%     is to allow further optimizations at the place from which the
+%%%     code was moved (a jump around the block could be replaced with a
+%%%     fallthrough).
 %%%
 %%% (3) Any unreachable code is removed.  Unreachable code is code
 %%%     after jump, call_last and other instructions which never
@@ -130,13 +133,6 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
     Fs = [function(F) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
-module_labels({Mod,Exp,Attr,Fs,Lc}) ->
-    {Mod,Exp,Attr,[function_labels(F) || F <- Fs],Lc}.
-
-function_labels({function,Name,Arity,CLabel,Asm0}) ->
-    Asm = remove_unused_labels(Asm0),
-    {function,Name,Arity,CLabel,Asm}.    
-
 %% function(Function) -> Function'
 %%  Optimize jumps and branches.
 %%
@@ -224,6 +220,8 @@ extract_seq([{line,_}=Line|Is], Acc) ->
     extract_seq(Is, [Line|Acc]);
 extract_seq([{block,_}=Bl|Is], Acc) ->
     extract_seq_1(Is, [Bl|Acc]);
+extract_seq([{bs_context_to_binary,_}=I|Is], Acc) ->
+    extract_seq_1(Is, [I|Acc]);
 extract_seq([{label,_}|_]=Is, Acc) ->
     extract_seq_1(Is, Acc);
 extract_seq(_, _) -> no.
@@ -232,6 +230,9 @@ extract_seq_1([{line,_}=Line|Is], Acc) ->
     extract_seq_1(Is, [Line|Acc]);
 extract_seq_1([{label,_},{func_info,_,_,_}|_], _) ->
     no;
+extract_seq_1([{label,Lbl},{jump,{f,Lbl}}|_], _) ->
+    %% Don't move a sequence which have a fallthrough entering it.
+    no;
 extract_seq_1([{label,_}=Lbl|Is], Acc) ->
     {yes,[Lbl|Acc],Is};
 extract_seq_1(_, _) -> no.
@@ -260,43 +261,39 @@ find_fixpoint(OptFun, Is0) ->
 	Is -> find_fixpoint(OptFun, Is)
     end.
 
-opt([{test,Test0,{f,Lnum}=Lbl,Ops}=I|Is0], Acc, St) ->
-    case Is0 of
-	[{jump,{f,Lnum}}|Is] ->
-	    %% We have
-	    %%    Test Label Ops
-	    %%    jump Label
-	    %% The test instruction is definitely not needed.
-	    %% The jump instruction is not needed if there is
-	    %% a definition of Label following the jump instruction.
-	    case is_label_defined(Is, Lnum) of
-		false ->
-		    %% The jump instruction is still needed.
-		    opt(Is0, [I|Acc], label_used(Lbl, St));
-		true ->
-		    %% Neither the test nor the jump are needed.
-		    opt(Is, Acc, St)
-	    end;
-	[{jump,To}|Is] ->
-	    case is_label_defined(Is, Lnum) of
-		false ->
+opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
+    %% We have
+    %%    Test Label Ops
+    %%    jump Label
+    %% The test instruction is not needed if the test is pure
+    %% (it modifies neither registers nor bit syntax state).
+    case beam_utils:is_pure_test(I) of
+	false ->
+	    %% Test is not pure; we must keep it.
+	    opt(Is, [I|Acc], label_used(Lbl, St));
+	true ->
+	    %% The test is pure and its failure label is the same
+	    %% as in the jump that follows -- thus it is not needed.
+	    opt(Is, Acc, St)
+    end;
+opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
+    case is_label_defined(Is, L) of
+	false ->
+	    opt(Is0, [I|Acc], label_used(Lbl, St));
+	true ->
+	    case invert_test(Test0) of
+		not_possible ->
 		    opt(Is0, [I|Acc], label_used(Lbl, St));
-		true ->
-		    case invert_test(Test0) of
-			not_possible ->
-			    opt(Is0, [I|Acc], label_used(Lbl, St));
-			Test ->
-			    opt([{test,Test,To,Ops}|Is], Acc, St)
-		    end
-	    end;
-	_Other ->
-	    opt(Is0, [I|Acc], label_used(Lbl, St))
+		Test ->
+		    %% Invert the test and remove the jump.
+		    opt([{test,Test,To,Ops}|Is], Acc, St)
+	    end
     end;
+opt([{test,_,{f,_}=Lbl,_}=I|Is], Acc, St) ->
+    opt(Is, [I|Acc], label_used(Lbl, St));
 opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) ->
     opt(Is, [I|Acc], label_used(Lbl, St));
-opt([{select_val,_R,Fail,{list,Vls}}=I|Is], Acc, St) ->
-    skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
-opt([{select_tuple_arity,_R,Fail,{list,Vls}}=I|Is], Acc, St) ->
+opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) ->
     skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
 opt([{label,L}=I|Is], Acc, #st{entry=L}=St) ->
     %% NEVER move the entry label.
@@ -412,14 +409,8 @@ is_label_used(L, St) ->
 
 is_unreachable_after({func_info,_M,_F,_A}) -> true;
 is_unreachable_after(return) -> true;
-is_unreachable_after({call_ext_last,_Ar,_ExtFunc,_D}) -> true;
-is_unreachable_after({call_ext_only,_Ar,_ExtFunc}) -> true;
-is_unreachable_after({call_last,_Ar,_Lbl,_D}) -> true;
-is_unreachable_after({call_only,_Ar,_Lbl}) -> true;
-is_unreachable_after({apply_last,_Ar,_N}) -> true;
 is_unreachable_after({jump,_Lbl}) -> true;
-is_unreachable_after({select_val,_R,_Lbl,_Cases}) -> true;
-is_unreachable_after({select_tuple_arity,_R,_Lbl,_Cases}) -> true;
+is_unreachable_after({select,_What,_R,_Lbl,_Cases}) -> true;
 is_unreachable_after({loop_rec_end,_}) -> true;
 is_unreachable_after({wait,_}) -> true;
 is_unreachable_after(I) -> is_exit_instruction(I).
@@ -430,10 +421,6 @@ is_unreachable_after(I) -> is_exit_instruction(I).
 
 is_exit_instruction({call_ext,_,{extfunc,M,F,A}}) ->
     erl_bifs:is_exit_bif(M, F, A);
-is_exit_instruction({call_ext_last,_,{extfunc,M,F,A},_}) ->
-    erl_bifs:is_exit_bif(M, F, A);
-is_exit_instruction({call_ext_only,_,{extfunc,M,F,A}}) ->
-    erl_bifs:is_exit_bif(M, F, A);
 is_exit_instruction(if_end) -> true;
 is_exit_instruction({case_end,_}) -> true;
 is_exit_instruction({try_case_end,_}) -> true;
@@ -516,9 +503,7 @@ ulbl({test,_,Fail,_}, Used) ->
     mark_used(Fail, Used);
 ulbl({test,_,Fail,_,_,_}, Used) ->
     mark_used(Fail, Used);
-ulbl({select_val,_,Fail,{list,Vls}}, Used) ->
-    mark_used_list(Vls, mark_used(Fail, Used));
-ulbl({select_tuple_arity,_,Fail,{list,Vls}}, Used) ->
+ulbl({select,_,_,Fail,Vls}, Used) ->
     mark_used_list(Vls, mark_used(Fail, Used));
 ulbl({'try',_,Lbl}, Used) ->
     mark_used(Lbl, Used);
@@ -538,29 +523,9 @@ ulbl({bif,_Name,Lbl,_As,_R}, Used) ->
     mark_used(Lbl, Used);
 ulbl({gc_bif,_Name,Lbl,_Live,_As,_R}, Used) ->
     mark_used(Lbl, Used);
-ulbl({bs_init2,Lbl,_,_,_,_,_}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_init_bits,Lbl,_,_,_,_,_}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_integer,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_float,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_binary,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_utf8,Lbl,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_utf16,Lbl,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_put_utf32,Lbl,_Fl,_Val}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_add,Lbl,_,_}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_append,Lbl,_,_,_,_,_,_,_}, Used) ->
-    mark_used(Lbl, Used);
-ulbl({bs_utf8_size,Lbl,_,_}, Used) ->
+ulbl({bs_init,Lbl,_,_,_,_}, Used) ->
     mark_used(Lbl, Used);
-ulbl({bs_utf16_size,Lbl,_,_}, Used) ->
+ulbl({bs_put,Lbl,_,_}, Used) ->
     mark_used(Lbl, Used);
 ulbl(_, Used) -> Used.
 
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index f39fc50b95..a199aa50ed 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -120,13 +120,13 @@ peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) ->
 		    peep(Is, SeenTests, [I|Acc])
 	    end
     end;
-peep([{select_val,Src,Fail,
-	{list,[{atom,false},{f,L},{atom,true},{f,L}]}}|
+peep([{select,select_val,Src,Fail,
+       [{atom,false},{f,L},{atom,true},{f,L}]}|
       [{label,L}|_]=Is], SeenTests, Acc) ->
     I = {test,is_boolean,Fail,[Src]},
     peep([I|Is], SeenTests, Acc);
-peep([{select_val,Src,Fail,
-       {list,[{atom,true},{f,L},{atom,false},{f,L}]}}|
+peep([{select,select_val,Src,Fail,
+       [{atom,true},{f,L},{atom,false},{f,L}]}|
       [{label,L}|_]=Is], SeenTests, Acc) ->
     I = {test,is_boolean,Fail,[Src]},
     peep([I|Is], SeenTests, Acc);
diff --git a/lib/compiler/src/beam_receive.erl b/lib/compiler/src/beam_receive.erl
index bd1f44f66b..fe95a7e35b 100644
--- a/lib/compiler/src/beam_receive.erl
+++ b/lib/compiler/src/beam_receive.erl
@@ -84,13 +84,29 @@ function({function,Name,Arity,Entry,Is}) ->
 	    erlang:raise(Class, Error, Stack)
     end.
 
+opt([{call_ext,A,{extfunc,erlang,spawn_monitor,A}}=I0|Is0], D, Acc)
+  when A =:= 1; A =:= 3 ->
+    case ref_in_tuple(Is0) of
+	no ->
+	    opt(Is0, D, [I0|Acc]);
+	{yes,Regs,Is1,MatchReversed} ->
+	    %% The call creates a brand new reference. Now
+	    %% search for a receive statement in the same
+	    %% function that will match against the reference.
+	    case opt_recv(Is1, Regs, D) of
+		no ->
+		    opt(Is0, D, [I0|Acc]);
+		{yes,Is,Lbl} ->
+		    opt(Is, D, MatchReversed++[I0,{recv_mark,{f,Lbl}}|Acc])
+	    end
+    end;
 opt([{call_ext,Arity,{extfunc,erlang,Name,Arity}}=I|Is0], D, Acc) ->
     case creates_new_ref(Name, Arity) of
 	true ->
 	    %% The call creates a brand new reference. Now
 	    %% search for a receive statement in the same
 	    %% function that will match against the reference.
-	    case opt_recv(Is0, D) of
+	    case opt_recv(Is0, regs_init_x0(), D) of
 		no ->
 		    opt(Is0, D, [I|Acc]);
 		{yes,Is,Lbl} ->
@@ -104,19 +120,34 @@ opt([I|Is], D, Acc) ->
 opt([], _, Acc) ->
     reverse(Acc).
 
+ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1,
+	      {test,test_arity,_,[{x,0},2]}=I2,
+	      {block,[{set,[_],[{x,0}],{get_tuple_element,0}},
+		      {set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) ->
+    ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]);
+ref_in_tuple([{test,is_tuple,_,[{x,0}]}=I1,
+	      {test,test_arity,_,[{x,0},2]}=I2,
+	      {block,[{set,[Dst],[{x,0}],{get_tuple_element,1}}|Bl]}=I3|Is]) ->
+    ref_in_tuple_1(Bl, Dst, Is, [I3,I2,I1]);
+ref_in_tuple(_) -> no.
+
+ref_in_tuple_1(Bl, Dst, Is, MatchReversed) ->
+    Regs0 = regs_init_singleton(Dst),
+    Regs = opt_update_regs_bl(Bl, Regs0),
+    {yes,Regs,Is,MatchReversed}.
+
 %% creates_new_ref(Name, Arity) -> true|false.
 %%  Return 'true' if the BIF Name/Arity will create a new reference.
 creates_new_ref(monitor, 2) -> true;
 creates_new_ref(make_ref, 0) -> true;
 creates_new_ref(_, _) -> false.
 
-%% opt_recv([Instruction], LabelIndex) -> no|{yes,[Instruction]}
+%% opt_recv([Instruction], Regs, LabelIndex) -> no|{yes,[Instruction]}
 %%  Search for a receive statement that will only retrieve messages
 %%  that contain the newly created reference (which is currently in {x,0}).
-opt_recv(Is, D) ->
-    R = regs_init_x0(),
+opt_recv(Is, Regs, D) ->
     L = gb_sets:empty(),
-    opt_recv(Is, D, R, L, []).
+    opt_recv(Is, D, Regs, L, []).
 
 opt_recv([{label,L}=Lbl,{loop_rec,{f,Fail},_}=Loop|Is], D, R0, _, Acc) ->
     R = regs_kill_not_live(0, R0),
@@ -157,8 +188,6 @@ opt_update_regs({call_fun,_}, R, L) ->
     {regs_kill_not_live(0, R),L};
 opt_update_regs({kill,Y}, R, L) ->
     {regs_kill([Y], R),L};
-opt_update_regs(send, R, L) ->
-    {regs_kill_not_live(0, R),L};
 opt_update_regs({'catch',_,{f,Lbl}}, R, L) ->
     {R,gb_sets:add(Lbl, L)};
 opt_update_regs({catch_end,_}, R, L) ->
@@ -253,10 +282,7 @@ opt_ref_used_1([{test,is_ne_exact,{f,Fail},Args}|Is], RefReg, D, Done0, Regs) ->
 opt_ref_used_1([{test,_,{f,Fail},_}|Is], RefReg, D, Done0, Regs) ->
     Done = opt_ref_used_at(Fail, RefReg, D, Done0, Regs),
     opt_ref_used_1(Is, RefReg, D, Done, Regs);
-opt_ref_used_1([{select_tuple_arity,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) ->
-    Lbls = [F || {f,F} <- List] ++ [Fail],
-    opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs);
-opt_ref_used_1([{select_val,_,{f,Fail},{list,List}}|_], RefReg, D, Done, Regs) ->
+opt_ref_used_1([{select,_,_,{f,Fail},List}|_], RefReg, D, Done, Regs) ->
     Lbls = [F || {f,F} <- List] ++ [Fail],
     opt_ref_used_in_all(Lbls, RefReg, D, Done, Regs);
 opt_ref_used_1([{label,Lbl}|Is], RefReg, D, Done, Regs) ->
@@ -323,6 +349,12 @@ opt_ref_used_bl([], Regs) -> Regs.
 regs_init() ->
     {0,0}.
 
+%% regs_init_singleton(Register) -> RegisterSet
+%%  Return a set that only contains one register.
+
+regs_init_singleton(Reg) ->
+    regs_add(Reg, regs_init()).
+
 %% regs_init_x0() -> RegisterSet
 %%  Return a set that only contains the {x,0} register.
 
diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl
index 5f4fa3b1f8..d95db1f681 100644
--- a/lib/compiler/src/beam_trim.erl
+++ b/lib/compiler/src/beam_trim.erl
@@ -172,38 +172,16 @@ remap([{bif,Name,Fail,Ss,D}|Is], Map, Acc) ->
 remap([{gc_bif,Name,Fail,Live,Ss,D}|Is], Map, Acc) ->
     I = {gc_bif,Name,Fail,Live,[Map(S) || S <- Ss],Map(D)},
     remap(Is, Map, [I|Acc]);
-remap([{bs_add,Fail,[SrcA,SrcB,U],D}|Is], Map, Acc) ->
-    I = {bs_add,Fail,[Map(SrcA),Map(SrcB),U],Map(D)},
+remap([{bs_init,Fail,Info,Live,Ss0,Dst0}|Is], Map, Acc) ->
+    Ss = [Map(Src) || Src <- Ss0],
+    Dst = Map(Dst0),
+    I = {bs_init,Fail,Info,Live,Ss,Dst},
     remap(Is, Map, [I|Acc]);
-remap([{bs_append=Op,Fail,Bits,Heap,Live,Unit,Bin,Flags,D}|Is], Map, Acc) ->
-    I = {Op,Fail,Map(Bits),Heap,Live,Unit,Map(Bin),Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_private_append=Op,Fail,Bits,Unit,Bin,Flags,D}|Is], Map, Acc) ->
-    I = {Op,Fail,Map(Bits),Unit,Map(Bin),Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([bs_init_writable=I|Is], Map, Acc) ->
-    remap(Is, Map, [I|Acc]);
-remap([{bs_init2,Fail,Src,Live,U,Flags,D}|Is], Map, Acc) ->
-    I = {bs_init2,Fail,Map(Src),Live,U,Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_init_bits,Fail,Src,Live,U,Flags,D}|Is], Map, Acc) ->
-    I = {bs_init_bits,Fail,Map(Src),Live,U,Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_put_binary=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) ->
-    I = {Op,Fail,Map(Src),U,Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_put_integer=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) ->
-    I = {Op,Fail,Map(Src),U,Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_put_float=Op,Fail,Src,U,Flags,D}|Is], Map, Acc) ->
-    I = {Op,Fail,Map(Src),U,Flags,Map(D)},
-    remap(Is, Map, [I|Acc]);
-remap([{bs_put_string,_,_}=I|Is], Map, Acc) ->
+remap([{bs_put=Op,Fail,Info,Ss}|Is], Map, Acc) ->
+    I = {Op,Fail,Info,[Map(S) || S <- Ss]},
     remap(Is, Map, [I|Acc]);
 remap([{kill,Y}|T], Map, Acc) ->
     remap(T, Map, [{kill,Map(Y)}|Acc]);
-remap([send=I|T], Map, Acc) ->
-    remap(T, Map, [I|Acc]);
 remap([{make_fun2,_,_,_,_}=I|T], Map, Acc) ->
     remap(T, Map, [I|Acc]);
 remap([{deallocate,N}|Is], Map, Acc) ->
@@ -217,12 +195,6 @@ remap([{test,Name,Fail,Live,Ss,Dst}|Is], Map, Acc) ->
     remap(Is, Map, [I|Acc]);
 remap([return|_]=Is, _, Acc) ->
     reverse(Acc, Is);
-remap([{call_last,Ar,Name,N}|Is], Map, Acc) ->
-    I = {call_last,Ar,Name,Map({frame_size,N})},
-    reverse(Acc, [I|Is]);
-remap([{call_ext_last,Ar,Name,N}|Is], Map, Acc) ->
-    I = {call_ext_last,Ar,Name,Map({frame_size,N})},
-    reverse(Acc, [I|Is]);
 remap([{line,_}=I|Is], Map, Acc) ->
     remap(Is, Map, [I|Acc]).
     
@@ -280,8 +252,8 @@ frame_size([{call_fun,_}|Is], Safe) ->
     frame_size(Is, Safe);
 frame_size([{call,_,_}|Is], Safe) ->
     frame_size(Is, Safe);
-frame_size([{call_ext,A,{extfunc,M,F,A}}|Is], Safe) ->
-    case erl_bifs:is_exit_bif(M, F, A) of
+frame_size([{call_ext,_,_}=I|Is], Safe) ->
+    case beam_jump:is_exit_instruction(I) of
 	true -> throw(not_possible);
 	false -> frame_size(Is, Safe)
     end;
@@ -295,35 +267,15 @@ frame_size([{test,_,{f,L},_}|Is], Safe) ->
     frame_size_branch(L, Is, Safe);
 frame_size([{test,_,{f,L},_,_,_}|Is], Safe) ->
     frame_size_branch(L, Is, Safe);
-frame_size([{bs_add,{f,L},_,_}|Is], Safe) ->
+frame_size([{bs_init,{f,L},_,_,_,_}|Is], Safe) ->
     frame_size_branch(L, Is, Safe);
-frame_size([{bs_append,{f,L},_,_,_,_,_,_,_}|Is], Safe) ->
+frame_size([{bs_put,{f,L},_,_}|Is], Safe) ->
     frame_size_branch(L, Is, Safe);
-frame_size([{bs_private_append,{f,L},_,_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([bs_init_writable|Is], Safe) ->
-    frame_size(Is, Safe);
-frame_size([{bs_init2,{f,L},_,_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([{bs_init_bits,{f,L},_,_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([{bs_put_binary,{f,L},_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([{bs_put_integer,{f,L},_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([{bs_put_float,{f,L},_,_,_,_}|Is], Safe) ->
-    frame_size_branch(L, Is, Safe);
-frame_size([{bs_put_string,_,_}|Is], Safe) ->
-    frame_size(Is, Safe);
 frame_size([{kill,_}|Is], Safe) ->
     frame_size(Is, Safe);
-frame_size([send|Is], Safe) ->
-    frame_size(Is, Safe);
 frame_size([{make_fun2,_,_,_,_}|Is], Safe) ->
     frame_size(Is, Safe);
 frame_size([{deallocate,N}|_], _) -> N;
-frame_size([{call_last,_,_,N}|_], _) -> N;
-frame_size([{call_ext_last,_,_,N}|_], _) -> N;
 frame_size([{line,_}|Is], Safe) ->
     frame_size(Is, Safe);
 frame_size([_|_], _) -> throw(not_possible).
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 194f089ba1..8af0447f63 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -87,7 +87,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
 %%  across branches.
 
 is_not_used(R, Is, D) ->
-    St = #live{bl=fun check_used_block/2,lbl=D,res=gb_trees:empty()},
+    St = #live{bl=check_used_block_fun(D),lbl=D,res=gb_trees:empty()},
     case check_liveness(R, Is, St) of
 	{killed,_} -> true;
 	{used,_} -> false;
@@ -102,7 +102,7 @@ is_not_used(R, Is, D) ->
 %%  across branches.
 
 is_not_used_at(R, Lbl, D) ->
-    St = #live{bl=fun check_used_block/2,lbl=D,res=gb_trees:empty()},
+    St = #live{bl=check_used_block_fun(D),lbl=D,res=gb_trees:empty()},
     case check_liveness_at(R, Lbl, St) of
 	{killed,_} -> true;
 	{used,_} -> false;
@@ -263,26 +263,14 @@ check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) ->
 		{_,_}=Other -> Other
 	    end
     end;
-check_liveness(R, [{test,_,{f,Fail},Live,Ss,_}|Is], St0) ->
-    case R of
-	{x,X} ->
-	    case X < Live orelse member(R, Ss) of
-		true -> {used,St0};
-		false -> check_liveness_at(R, Fail, St0)
-	    end;
-	{y,_} ->
-	    case check_liveness_at(R, Fail, St0) of
-		{killed,St} -> check_liveness(R, Is, St);
-		{_,_}=Other -> Other
-	    end
-    end;
-check_liveness(R, [{select_val,R,_,_}|_], St) ->
-    {used,St};
-check_liveness(R, [{select_val,_,Fail,{list,Branches}}|_], St) ->
-    check_liveness_everywhere(R, [Fail|Branches], St);
-check_liveness(R, [{select_tuple_arity,R,_,_}|_], St) ->
+check_liveness(R, [{test,Op,Fail,Live,Ss,Dst}|Is], St) ->
+    %% Check this instruction as a block to get a less conservative
+    %% result if the caller is is_not_used/3.
+    Block = [{set,[Dst],Ss,{alloc,Live,{bif,Op,Fail}}}],
+    check_liveness(R, [{block,Block}|Is], St);
+check_liveness(R, [{select,_,R,_,_}|_], St) ->
     {used,St};
-check_liveness(R, [{select_tuple_arity,_,Fail,{list,Branches}}|_], St) ->
+check_liveness(R, [{select,_,_,Fail,Branches}|_], St) ->
     check_liveness_everywhere(R, [Fail|Branches], St);
 check_liveness(R, [{jump,{f,F}}|_], St) ->
     check_liveness_at(R, F, St);
@@ -301,37 +289,33 @@ check_liveness(R, [{kill,R}|_], St) ->
     {killed,St};
 check_liveness(R, [{kill,_}|Is], St) ->
     check_liveness(R, Is, St);
-check_liveness(R, [bs_init_writable|Is], St) ->
-    if
-	R =:= {x,0} -> {used,St};
-	true -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_private_append,_,Bits,_,Bin,_,Dst}|Is], St) ->
-    case R of
-	Bits -> {used,St};
-	Bin -> {used,St};
-	Dst -> {killed,St};
-	_ -> check_liveness(R, Is, St)
+check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) ->
+    case member(R, Ss) of
+	true ->
+	    {used,St};
+	false ->
+	    if
+		R =:= Dst -> {killed,St};
+		true -> check_liveness(R, Is, St)
+	    end
     end;
-check_liveness(R, [{bs_append,_,Bits,_,_,_,Bin,_,Dst}|Is], St) ->
+check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
     case R of
-	Bits -> {used,St};
-	Bin -> {used,St};
-	Dst -> {killed,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_init2,_,_,_,_,_,Dst}|Is], St) ->
-    if
-	R =:= Dst -> {killed,St};
-	true -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_init_bits,_,_,_,_,_,Dst}|Is], St) ->
-    if
-	R =:= Dst -> {killed,St};
-	true -> check_liveness(R, Is, St)
+	{x,X} ->
+	    case X < Live orelse member(R, Ss) of
+		true -> {used,St};
+		false -> {killed,St}
+	    end;
+	{y,_} ->
+	    case member(R, Ss) of
+		true -> {used,St};
+		false ->
+		    if
+			R =:= Dst -> {killed,St};
+			true -> check_liveness(R, Is, St)
+		    end
+	    end
     end;
-check_liveness(R, [{bs_put_string,_,_}|Is], St) ->
-    check_liveness(R, Is, St);
 check_liveness(R, [{deallocate,_}|Is], St) ->
     case R of
 	{y,_} -> {killed,St};
@@ -339,29 +323,20 @@ check_liveness(R, [{deallocate,_}|Is], St) ->
     end;
 check_liveness(R, [return|_], St) ->
     check_liveness_live_ret(R, 1, St);
-check_liveness(R, [{call_last,Live,_,_}|_], St) ->
-    check_liveness_live_ret(R, Live, St);
-check_liveness(R, [{call_only,Live,_}|_], St) ->
-    check_liveness_live_ret(R, Live, St);
-check_liveness(R, [{call_ext_last,Live,_,_}|_], St) ->
-    check_liveness_live_ret(R, Live, St);
-check_liveness(R, [{call_ext_only,Live,_}|_], St) ->
-    check_liveness_live_ret(R, Live, St);
 check_liveness(R, [{call,Live,_}|Is], St) ->
     case R of
 	{x,X} when X < Live -> {used,St};
 	{x,_} -> {killed,St};
 	{y,_} -> check_liveness(R, Is, St)
     end;
-check_liveness(R, [{call_ext,Live,Func}|Is], St) ->
+check_liveness(R, [{call_ext,Live,_}=I|Is], St) ->
     case R of
 	{x,X} when X < Live ->
 	    {used,St};
 	{x,_} ->
 	    {killed,St};
 	{y,_} ->
-	    {extfunc,Mod,Name,Arity} = Func,
-	    case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+	    case beam_jump:is_exit_instruction(I) of
 		false ->
 		    check_liveness(R, Is, St);
 		true ->
@@ -387,14 +362,6 @@ check_liveness(R, [{apply,Args}|Is], St) ->
 	{x,_} -> {killed,St};
 	{y,_} -> check_liveness(R, Is, St)
     end;
-check_liveness(R, [{apply_last,Args,_}|_], St) ->
-    check_liveness_live_ret(R, Args+2, St);
-check_liveness(R, [send|Is], St) ->
-    case R of
-	{x,X} when X < 2 -> {used,St};
-	{x,_} -> {killed,St};
-	{y,_} -> check_liveness(R, Is, St)
-    end;
 check_liveness({x,R}, [{'%live',Live}|Is], St) ->
     if
 	R < Live -> check_liveness(R, Is, St);
@@ -429,25 +396,9 @@ check_liveness(R, [{gc_bif,Op,{f,Fail},Live,Ss,D}|Is], St0) ->
 		    Other
 	    end
     end;
-check_liveness(R, [{bs_add,{f,0},Ss,D}|Is], St) ->
+check_liveness(R, [{bs_put,{f,0},_,Ss}|Is], St) ->
     case member(R, Ss) of
 	true -> {used,St};
-	false when R =:= D -> {killed,St};
-	false -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_put_binary,{f,0},Sz,_,_,Src}|Is], St) ->
-    case member(R, [Sz,Src]) of
-	true -> {used,St};
-	false -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_put_integer,{f,0},Sz,_,_,Src}|Is], St) ->
-    case member(R, [Sz,Src]) of
-	true -> {used,St};
-	false -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_put_float,{f,0},Sz,_,_,Src}|Is], St) ->
-    case member(R, [Sz,Src]) of
-	true -> {used,St};
 	false -> check_liveness(R, Is, St)
     end;
 check_liveness(R, [{bs_restore2,S,_}|Is], St) ->
@@ -472,6 +423,16 @@ check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) ->
 	{x,_} -> {killed,St};
 	_ -> check_liveness(R, Is, St)
     end;
+check_liveness({x,_}=R, [{'catch',_,_}|Is], St) ->
+    %% All x registers will be killed if an exception occurs.
+    %% Therefore we only need to check the liveness for the
+    %% instructions following the catch instruction.
+    check_liveness(R, Is, St);
+check_liveness({x,_}=R, [{'try',_,_}|Is], St) ->
+    %% All x registers will be killed if an exception occurs.
+    %% Therefore we only need to check the liveness for the
+    %% instructions inside the 'try' block.
+    check_liveness(R, Is, St);
 check_liveness(R, [{try_end,Y}|Is], St) ->
     case R of
 	Y ->
@@ -602,26 +563,50 @@ check_killed_block(_, []) -> transparent.
 %%  
 %%    (Unknown instructions will cause an exception.)
 
-check_used_block({x,X}=R, [{set,_,_,{alloc,Live,_}}|Is]) ->
+check_used_block_fun(D) ->
+    fun(R, Is) -> check_used_block(R, Is, D) end.
+
+check_used_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], D) ->
     if 
 	X >= Live -> killed;
-	true -> check_used_block(R, Is)
+	true ->
+	    case member(R, Ss) orelse
+		is_reg_used_at(R, Op, D) of
+		true -> used;
+		false ->
+		    case member(R, Ds) of
+			true -> killed;
+			false -> check_used_block(R, Is, D)
+		    end
+	    end
     end;
-check_used_block(R, [{set,Ds,Ss,_Op}|Is]) ->
-    case member(R, Ss) of
+check_used_block(R, [{set,Ds,Ss,Op}|Is], D) ->
+    case member(R, Ss) orelse
+	is_reg_used_at(R, Op, D) of
 	true -> used;
 	false ->
 	    case member(R, Ds) of
 		true -> killed;
-		false -> check_used_block(R, Is)
+		false -> check_used_block(R, Is, D)
 	    end
     end;
-check_used_block(R, [{'%live',Live}|Is]) ->
+check_used_block(R, [{'%live',Live}|Is], D) ->
     case R of
 	{x,X} when X >= Live -> killed;
-	_ -> check_used_block(R, Is)
+	_ -> check_used_block(R, Is, D)
     end;
-check_used_block(_, []) -> transparent.
+check_used_block(_, [], _) -> transparent.
+
+is_reg_used_at(R, {gc_bif,_,{f,Lbl}}, D) ->
+    is_reg_used_at_1(R, Lbl, D);
+is_reg_used_at(R, {bif,_,{f,Lbl}}, D) ->
+    is_reg_used_at_1(R, Lbl, D);
+is_reg_used_at(_, _, _) -> false.
+
+is_reg_used_at_1(_, 0, _) ->
+    false;
+is_reg_used_at_1(R, Lbl, D) ->
+    not is_not_used_at(R, Lbl, D).
 
 index_labels_1([{label,Lbl}|Is0], Acc) ->
     Is = lists:dropwhile(fun({label,_}) -> true;
@@ -654,49 +639,21 @@ combine_alloc_lists_1([]) -> [].
 live_opt([{bs_context_to_binary,Src}=I|Is], Regs0, D, Acc) ->
     Regs = x_live([Src], Regs0),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_add,Fail,[Src1,Src2,_],Dst}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src1,Src2], x_dead([Dst], Regs0)),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_init2,Fail,_,_,Live,_,_}=I|Is], _, D, Acc) ->
-    Regs1 = live_call(Live),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_init_bits,Fail,Src1,_,Live,_,Src2}=I|Is], _, D, Acc) ->
-    Regs1 = live_call(Live),
-    Regs2 = x_live([Src1,Src2], Regs1),
-    Regs = live_join_label(Fail, D, Regs2),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_append,Fail,Src1,_,Live,_,Src2,_,Dst}=I|Is], _Regs0, D, Acc) ->
-    Regs1 = x_dead([Dst], x_live([Src1,Src2], live_call(Live))),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_private_append,Fail,Src1,_,Src2,_,Dst}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src1,Src2], x_dead([Dst], Regs0)),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_binary,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src1,Src2], Regs0),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_float,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src1,Src2], Regs0),
+live_opt([{bs_init,Fail,_,none,Ss,Dst}=I|Is], Regs0, D, Acc) ->
+    Regs1 = x_live(Ss, x_dead([Dst], Regs0)),
     Regs = live_join_label(Fail, D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_integer,Fail,Src1,_,_,Src2}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src1,Src2], Regs0),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_utf8,Fail,_,Src}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], Regs0),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_utf16,Fail,_,Src}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], Regs0),
-    Regs = live_join_label(Fail, D, Regs1),
+live_opt([{bs_init,Fail,Info,Live0,Ss,Dst}|Is], Regs0, D, Acc) ->
+    Regs1 = x_dead([Dst], Regs0),
+    Live = live_regs(Regs1),
+    true = Live =< Live0,	%Assertion.
+    Regs2 = live_call(Live),
+    Regs3 = x_live(Ss, Regs2),
+    Regs = live_join_label(Fail, D, Regs3),
+    I = {bs_init,Fail,Info,Live,Ss,Dst},
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_put_utf32,Fail,_,Src}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], Regs0),
+live_opt([{bs_put,Fail,_,Ss}=I|Is], Regs0, D, Acc) ->
+    Regs1 = x_live(Ss, Regs0),
     Regs = live_join_label(Fail, D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{bs_restore2,Src,_}=I|Is], Regs0, D, Acc) ->
@@ -705,14 +662,6 @@ live_opt([{bs_restore2,Src,_}=I|Is], Regs0, D, Acc) ->
 live_opt([{bs_save2,Src,_}=I|Is], Regs0, D, Acc) ->
     Regs = x_live([Src], Regs0),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_utf8_size,Fail,Src,Dst}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], x_dead([Dst], Regs0)),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{bs_utf16_size,Fail,Src,Dst}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], x_dead([Dst], Regs0)),
-    Regs = live_join_label(Fail, D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{test,bs_start_match2,Fail,Live,[Src,_],_}=I|Is], _, D, Acc) ->
     Regs0 = live_call(Live),
     Regs1 = x_live([Src], Regs0),
@@ -747,30 +696,16 @@ live_opt([{try_case_end,Src}=I|Is], _, D, Acc) ->
 live_opt([if_end=I|Is], _, D, Acc) ->
     Regs = 0,
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([bs_init_writable=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(1), D, [I|Acc]);
 live_opt([{call,Arity,_}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(Arity), D, [I|Acc]);
 live_opt([{call_ext,Arity,_}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(Arity), D, [I|Acc]);
 live_opt([{call_fun,Arity}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(Arity+1), D, [I|Acc]);
-live_opt([{call_last,Arity,_,_}=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(Arity), D, [I|Acc]);
-live_opt([{call_ext_last,Arity,_,_}=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(Arity), D, [I|Acc]);
 live_opt([{apply,Arity}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(Arity+2), D, [I|Acc]);
-live_opt([{apply_last,Arity,_}=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(Arity+2), D, [I|Acc]);
-live_opt([{call_only,Arity,_}=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(Arity), D, [I|Acc]);
-live_opt([{call_ext_only,Arity,_}=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(Arity), D, [I|Acc]);
 live_opt([{make_fun2,_,_,_,Arity}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(Arity), D, [I|Acc]);
-live_opt([send=I|Is], _, D, Acc) ->
-    live_opt(Is, live_call(2), D, [I|Acc]);
 live_opt([{test,_,Fail,Ss}=I|Is], Regs0, D, Acc) ->
     Regs1 = x_live(Ss, Regs0),
     Regs = live_join_label(Fail, D, Regs1),
@@ -780,16 +715,14 @@ live_opt([{test,_,Fail,Live,Ss,_}=I|Is], _, D, Acc) ->
     Regs1 = x_live(Ss, Regs0),
     Regs = live_join_label(Fail, D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{select_val,Src,Fail,{list,List}}=I|Is], Regs0, D, Acc) ->
+live_opt([{select,_,Src,Fail,List}=I|Is], Regs0, D, Acc) ->
     Regs1 = x_live([Src], Regs0),
     Regs = live_join_labels([Fail|List], D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{select_tuple_arity,Src,Fail,{list,List}}=I|Is], Regs0, D, Acc) ->
-    Regs1 = x_live([Src], Regs0),
-    Regs = live_join_labels([Fail|List], D, Regs1),
-    live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{'try',_,Fail}=I|Is], Regs0, D, Acc) ->
-    Regs = live_join_label(Fail, D, Regs0),
+live_opt([{'try',_,_}=I|Is], Regs, D, Acc) ->
+    %% If an exeption happens, all x registers will be killed.
+    %% Therefore, we should only base liveness of the code inside
+    %% the try.
     live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{try_case,_}=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(1), D, [I|Acc]);
@@ -799,8 +732,6 @@ live_opt([timeout=I|Is], _, D, Acc) ->
     live_opt(Is, 0, D, [I|Acc]);
 
 %% Transparent instructions - they neither use nor modify x registers.
-live_opt([{bs_put_string,_,_}=I|Is], Regs, D, Acc) ->
-    live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{deallocate,_}=I|Is], Regs, D, Acc) ->
     live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{kill,_}=I|Is], Regs, D, Acc) ->
@@ -827,13 +758,24 @@ live_opt([{allocate_heap,_,_,Live}=I|Is], _, D, Acc) ->
 
 live_opt([], _, _, Acc) -> Acc.
 
-live_opt_block([{set,[],[],{alloc,Live,_}}=I|Is], _, D, Acc) ->
-    live_opt_block(Is, live_call(Live), D, [I|Acc]);
-live_opt_block([{set,Ds,Ss,Op}=I|Is], Regs0, D, Acc) ->
-    Regs = case Op of
-	       {alloc,Live,_} -> live_call(Live);
-	       _ -> x_live(Ss, x_dead(Ds, Regs0))
-	   end,
+live_opt_block([{set,Ds,Ss,Op}=I0|Is], Regs0, D, Acc) ->
+    Regs1 = x_live(Ss, x_dead(Ds, Regs0)),
+    {I,Regs} = case Op of
+		   {alloc,Live0,Alloc} ->
+		       %% The life-time analysis used by the code generator
+		       %% is sometimes too conservative, so it may be
+		       %% possible to lower the number of live registers
+		       %% based on the exact liveness information.
+		       %% The main benefit is that more optimizations that
+		       %% depend on liveness information (such as the
+		       %% beam_bool and beam_dead passes) may be applied.
+		       Live = live_regs(Regs1),
+		       true = Live =< Live0,	%Assertion.
+		       I1 = {set,Ds,Ss,{alloc,Live,Alloc}},
+		       {I1,live_call(Live)};
+		   _ ->
+		       {I0,Regs1}
+	       end,
     case Ds of
 	[{x,X}] ->
 	    case (not is_live(X, Regs0)) andalso Op =:= move of
diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl
new file mode 100644
index 0000000000..8c6b0c916d
--- /dev/null
+++ b/lib/compiler/src/beam_z.erl
@@ -0,0 +1,79 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2012. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Run right before beam_asm to do any final fix-ups or clean-ups.
+%%          (Mandatory.)
+
+-module(beam_z).
+
+-export([module/2]).
+
+module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+    Fs = [function(F) || F <- Fs0],
+    {ok,{Mod,Exp,Attr,Fs,Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}) ->
+    try
+	Is = undo_renames(Is0),
+	{function,Name,Arity,CLabel,Is}
+    catch
+	Class:Error ->
+	    Stack = erlang:get_stacktrace(),
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
+
+undo_renames([{call_ext,2,send}|Is]) ->
+    [send|undo_renames(Is)];
+undo_renames([{apply,A},{deallocate,N},return|Is]) ->
+    [{apply_last,A,N}|undo_renames(Is)];
+undo_renames([{call,A,F},{deallocate,N},return|Is]) ->
+    [{call_last,A,F,N}|undo_renames(Is)];
+undo_renames([{call_ext,A,F},{deallocate,N},return|Is]) ->
+    [{call_ext_last,A,F,N}|undo_renames(Is)];
+undo_renames([{call,A,F},return|Is]) ->
+    [{call_only,A,F}|undo_renames(Is)];
+undo_renames([{call_ext,A,F},return|Is]) ->
+    [{call_ext_only,A,F}|undo_renames(Is)];
+undo_renames([I|Is]) ->
+    [undo_rename(I)|undo_renames(Is)];
+undo_renames([]) -> [].
+
+undo_rename({bs_put,F,{I,U,Fl},[Sz,Src]}) ->
+    {I,F,Sz,U,Fl,Src};
+undo_rename({bs_put,F,{I,Fl},[Src]}) ->
+    {I,F,Fl,Src};
+undo_rename({bs_put,{f,0},{bs_put_string,_,_}=I,[]}) ->
+    I;
+undo_rename({bif,bs_add=I,F,[Src1,Src2,{integer,U}],Dst}) ->
+    {I,F,[Src1,Src2,U],Dst};
+undo_rename({bif,bs_utf8_size=I,F,[Src],Dst}) ->
+    {I,F,Src,Dst};
+undo_rename({bif,bs_utf16_size=I,F,[Src],Dst}) ->
+    {I,F,Src,Dst};
+undo_rename({bs_init,F,{I,U,Flags},none,[Sz,Src],Dst}) ->
+    {I,F,Sz,U,Src,Flags,Dst};
+undo_rename({bs_init,F,{I,Extra,Flags},Live,[Sz],Dst}) ->
+    {I,F,Sz,Extra,Live,Flags,Dst};
+undo_rename({bs_init,F,{I,Extra,U,Flags},Live,[Sz,Src],Dst}) ->
+    {I,F,Sz,Extra,Live,U,Src,Flags,Dst};
+undo_rename({bs_init,_,bs_init_writable=I,_,_,_}) ->
+    I;
+undo_rename({select,I,Reg,Fail,List}) ->
+    {I,Reg,Fail,{list,List}};
+undo_rename(I) -> I.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 0a368df5d6..a3120eb917 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -224,6 +224,8 @@ format_error({delete_temp,File,Error}) ->
 		  [File,file:format_error(Error)]);
 format_error({parse_transform,M,R}) ->
     io_lib:format("error in parse transform '~s': ~p", [M, R]);
+format_error({undef_parse_transform,M}) ->
+    io_lib:format("undefined parse transform '~s'", [M]);
 format_error({core_transform,M,R}) ->
     io_lib:format("error in core transform '~s': ~p", [M, R]);
 format_error({crash,Pass,Reason}) ->
@@ -246,6 +248,7 @@ format_error({module_name,Mod,Filename}) ->
 		  abstract_code=[],		%Abstract code for debugger.
 		  options=[]  :: [option()],	%Options for compilation
 		  mod_options=[]  :: [option()], %Options for module_info
+                  encoding=none :: none | epp:source_coding(),
 		  errors=[],
 		  warnings=[]}).
 
@@ -551,12 +554,12 @@ select_list_passes_1([{iff,Flag,{done,Ext}}|Ps], Opts, Acc) ->
     end;
 select_list_passes_1([{iff=Op,Flag,List0}|Ps], Opts, Acc) when is_list(List0) ->
     case select_list_passes(List0, Opts) of
-	{done,_}=Done -> Done;
+	{done,List} -> {done,reverse(Acc) ++ List};
 	{not_done,List} -> select_list_passes_1(Ps, Opts, [{Op,Flag,List}|Acc])
     end;
 select_list_passes_1([{unless=Op,Flag,List0}|Ps], Opts, Acc) when is_list(List0) ->
     case select_list_passes(List0, Opts) of
-	{done,_}=Done -> Done;
+	{done,List} -> {done,reverse(Acc) ++ List};
 	{not_done,List} -> select_list_passes_1(Ps, Opts, [{Op,Flag,List}|Acc])
     end;
 select_list_passes_1([P|Ps], Opts, Acc) ->
@@ -630,7 +633,8 @@ kernel_passes() ->
 asm_passes() ->
     %% Assembly level optimisations.
     [{delay,
-      [{unless,no_postopt,
+      [{pass,beam_a},
+       {unless,no_postopt,
 	[{pass,beam_block},
 	 {iff,dblk,{listing,"block"}},
 	 {unless,no_except,{pass,beam_except}},
@@ -657,13 +661,11 @@ asm_passes() ->
 	 {iff,dtrim,{listing,"trim"}},
 	 {pass,beam_flatten}]},
 
-       %% If post optimizations are turned off, we still coalesce
-       %% adjacent labels and remove unused labels to keep the
-       %% HiPE compiler happy.
-       {iff,no_postopt,
-	[?pass(beam_unused_labels),
-	 {pass,beam_clean}]},
+       %% If post optimizations are turned off, we still
+       %% need to do a few clean-ups to code.
+       {iff,no_postopt,[{pass,beam_clean}]},
 
+       {pass,beam_z},
        {iff,dopt,{listing,"optimize"}},
        {iff,'S',{listing,"S"}},
        {iff,'to_asm',{done,"S"}}]},
@@ -733,8 +735,9 @@ collect_asm([X | Rest], R) ->
 beam_consult_asm(St) ->
     case file:consult(St#compile.ifile) of
 	{ok, Forms0} ->
+            Encoding = epp:read_encoding(St#compile.ifile),
 	    {Module, Forms} = preprocess_asm_forms(Forms0),
-	    {ok,St#compile{module=Module, code=Forms}};
+	    {ok,St#compile{module=Module, code=Forms, encoding=Encoding}};
 	{error,E} ->
 	    Es = [{St#compile.ifile,[{none,?MODULE,{open,E}}]}],
 	    {error,St#compile{errors=St#compile.errors ++ Es}}
@@ -776,7 +779,8 @@ parse_module(St) ->
     R =  epp:parse_file(St#compile.ifile, IncludePath, pre_defs(Opts)),
     case R of
 	{ok,Forms} ->
-	    {ok,St#compile{code=Forms}};
+            Encoding = epp:read_encoding(St#compile.ifile),
+	    {ok,St#compile{code=Forms,encoding=Encoding}};
 	{error,E} ->
 	    Es = [{St#compile.ifile,[{none,?MODULE,{epp,E}}]}],
 	    {error,St#compile{errors=St#compile.errors ++ Es}}
@@ -850,6 +854,10 @@ foldl_transform(St, [T|Ts]) ->
 	{error,Es,Ws} ->
 	    {error,St#compile{warnings=St#compile.warnings ++ Ws,
 			      errors=St#compile.errors ++ Es}};
+	{'EXIT',{undef,_}} ->
+	    Es = [{St#compile.ifile,[{none,compile,
+				      {undef_parse_transform,T}}]}],
+	    {error,St#compile{errors=St#compile.errors ++ Es}};
 	{'EXIT',R} ->
 	    Es = [{St#compile.ifile,[{none,compile,{parse_transform,T,R}}]}],
 	    {error,St#compile{errors=St#compile.errors ++ Es}};
@@ -1236,10 +1244,6 @@ random_bytes_1(N, Acc) -> random_bytes_1(N-1, [random:uniform(255)|Acc]).
 save_core_code(St) ->
     {ok,St#compile{core_code=cerl:from_records(St#compile.code)}}.
 
-beam_unused_labels(#compile{code=Code0}=St) ->
-    Code = beam_jump:module_labels(Code0),
-    {ok,St#compile{code=Code}}.
-
 beam_asm(#compile{ifile=File,code=Code0,
 		  abstract_code=Abst,mod_options=Opts0}=St) ->
     Source = filename:absname(File),
@@ -1423,28 +1427,28 @@ report_warnings(#compile{options=Opts,warnings=Ws0}) ->
     end.
 
 format_message(F, P, [{{Line,Column}=Loc,Mod,E}|Es]) ->
-    M = {{F,Loc},io_lib:format("~s:~w:~w ~s~s\n",
+    M = {{F,Loc},io_lib:format("~s:~w:~w ~s~ts\n",
                                 [F,Line,Column,P,Mod:format_error(E)])},
     [M|format_message(F, P, Es)];
 format_message(F, P, [{Line,Mod,E}|Es]) ->
-    M = {{F,{Line,0}},io_lib:format("~s:~w: ~s~s\n",
+    M = {{F,{Line,0}},io_lib:format("~s:~w: ~s~ts\n",
                                 [F,Line,P,Mod:format_error(E)])},
     [M|format_message(F, P, Es)];
 format_message(F, P, [{Mod,E}|Es]) ->
-    M = {none,io_lib:format("~s: ~s~s\n", [F,P,Mod:format_error(E)])},
+    M = {none,io_lib:format("~s: ~s~ts\n", [F,P,Mod:format_error(E)])},
     [M|format_message(F, P, Es)];
 format_message(_, _, []) -> [].
 
 %% list_errors(File, ErrorDescriptors) -> ok
 
 list_errors(F, [{{Line,Column},Mod,E}|Es]) ->
-    io:fwrite("~s:~w:~w: ~s\n", [F,Line,Column,Mod:format_error(E)]),
+    io:fwrite("~s:~w:~w: ~ts\n", [F,Line,Column,Mod:format_error(E)]),
     list_errors(F, Es);
 list_errors(F, [{Line,Mod,E}|Es]) ->
-    io:fwrite("~s:~w: ~s\n", [F,Line,Mod:format_error(E)]),
+    io:fwrite("~s:~w: ~ts\n", [F,Line,Mod:format_error(E)]),
     list_errors(F, Es);
 list_errors(F, [{Mod,E}|Es]) ->
-    io:fwrite("~s: ~s\n", [F,Mod:format_error(E)]),
+    io:fwrite("~s: ~ts\n", [F,Mod:format_error(E)]),
     list_errors(F, Es);
 list_errors(_F, []) -> ok.
 
@@ -1500,10 +1504,12 @@ src_listing(Ext, St) ->
 	    Ext, St).
 
 do_src_listing(Lf, Fs) ->
-    foreach(fun (F) -> io:put_chars(Lf, [erl_pp:form(F),"\n"]) end,
+    Opts = [lists:keyfind(encoding, 1, io:getopts(Lf))],
+    foreach(fun (F) -> io:put_chars(Lf, [erl_pp:form(F, Opts),"\n"]) end,
 	    Fs).
 
-listing(Ext, St) ->
+listing(Ext, St0) ->
+    St = St0#compile{encoding = none},
     listing(fun(Lf, Fs) -> beam_listing:module(Lf, Fs) end, Ext, St).
 
 listing(LFun, Ext, St) ->
@@ -1511,6 +1517,7 @@ listing(LFun, Ext, St) ->
     case file:open(Lfile, [write,delayed_write]) of
 	{ok,Lf} ->
             Code = restore_expanded_types(Ext, St#compile.code),
+            output_encoding(Lf, St),
 	    LFun(Lf, Code),
 	    ok = file:close(Lf),
 	    {ok,St};
@@ -1519,6 +1526,12 @@ listing(LFun, Ext, St) ->
 	    {error,St#compile{errors=St#compile.errors ++ Es}}
     end.
 
+output_encoding(F, #compile{encoding = none}) ->
+    ok = io:setopts(F, [{encoding, epp:default_encoding()}]);
+output_encoding(F, #compile{encoding = Encoding}) ->
+    ok = io:setopts(F, [{encoding, Encoding}]),
+    ok = io:fwrite(F, <<"%% ~s\n">>, [epp:encoding_to_string(Encoding)]).
+
 restore_expanded_types("P", Fs) ->
     epp:restore_typed_record_fields(Fs);
 restore_expanded_types("E", {M,I,Fs0}) ->
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 1133882728..94c78e68f9 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -20,6 +20,7 @@
  [{description, "ERTS  CXC 138 10"},
   {vsn, "%VSN%"},
   {modules, [
+	     beam_a,
 	     beam_asm,
 	     beam_block,
 	     beam_bool,
@@ -40,6 +41,7 @@
 	     beam_type,
 	     beam_utils,
 	     beam_validator,
+	     beam_z,
 	     cerl,
 	     cerl_clauses,
 	     cerl_inline,
diff --git a/lib/compiler/src/sys_pre_expand.erl b/lib/compiler/src/sys_pre_expand.erl
index 97d3ff626c..e55fb2a037 100644
--- a/lib/compiler/src/sys_pre_expand.erl
+++ b/lib/compiler/src/sys_pre_expand.erl
@@ -454,9 +454,6 @@ expr({call,Line,{remote,Lr,M,F},As0}, St0) ->
     M1 = expand_package(M, St0),
     {[M2,F1|As1],St1} = expr_list([M1,F|As0], St0),
     {{call,Line,{remote,Lr,M2,F1},As1},St1};
-expr({call,Line,{tuple,_,[{atom,_,_}=M,{atom,_,_}=F]},As}, St) ->
-    %% Rewrite {Mod,Function}(Args...) to Mod:Function(Args...).
-    expr({call,Line,{remote,Line,M,F},As}, St);
 expr({call,Line,F,As0}, St0) ->
     {[Fun1|As1],St1} = expr_list([F|As0], St0),
     {{call,Line,Fun1,As1},St1};
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 812e85553f..6a13495523 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -123,15 +123,24 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
 					   put_reg(V, Reg)
 				   end, [], Hvs),
 			 stk=[]}, 0, Vdb),
-    {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+    {B0,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
 			  St3#cg{bfail=0,
 				 ultimate_failure=UltimateMatchFail,
 				 is_top_block=true}),
+    B = fix_bs_match_strings(B0),
     {Name,Arity} = NameArity,
     Asm = [{label,Fi},line(Anno),{func_info,AtomMod,{atom,Name},Arity},
 	   {label,Fl}|B++[{label,UltimateMatchFail},if_end]],
     {Asm,Fl,St}.
 
+fix_bs_match_strings([{test,bs_match_string,F,[Ctx,BinList]}|Is])
+  when is_list(BinList) ->
+    I = {test,bs_match_string,F,[Ctx,list_to_bitstring(BinList)]},
+    [I|fix_bs_match_strings(Is)];
+fix_bs_match_strings([I|Is]) ->
+    [I|fix_bs_match_strings(Is)];
+fix_bs_match_strings([]) -> [].
+
 %% cg(Lkexpr, Vdb, StackReg, State) -> {[Ainstr],StackReg,State}.
 %%  Generate code for a kexpr.
 %%  Split function into two steps for clarity, not efficiency.
@@ -714,7 +723,22 @@ select_bin_seg(#l{ke={val_clause,{bin_int,Ctx,Sz,U,Fs,Val,Es},B},i=I,vdb=Vdb},
 				       I, Vdb, Bef, Ctx, St0),
     {Bis,Aft,St2} = match_cg(B, Fail, Int, St1),
     CtxReg = fetch_var(Ctx, Bef),
-    {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Mis] ++ Bis,Aft,St2}.
+    Is = case Mis ++ Bis of
+	     [{test,bs_match_string,F,[OtherCtx,Bin1]},
+	      {bs_save2,OtherCtx,_},
+	      {bs_restore2,OtherCtx,_},
+	      {test,bs_match_string,F,[OtherCtx,Bin2]}|Is0] ->
+		 %% We used to do this optimization later, but it
+		 %% turns out that in huge functions with many
+		 %% bs_match_string instructions, it's a big win
+		 %% to do the combination now. To avoid copying the
+		 %% binary data again and again, we'll combine bitstrings
+		 %% in a list and convert all of it to a bitstring later.
+		 [{test,bs_match_string,F,[OtherCtx,[Bin1,Bin2]]}|Is0];
+	     Is0 ->
+		 Is0
+	 end,
+    {[{bs_restore2,CtxReg,{Ctx,Ivar}}|Is],Aft,St2}.
 
 select_extract_int([{var,Tl}], Val, {integer,Sz}, U, Fs, Vf,
 		   I, Vdb, Bef, Ctx, St) ->
@@ -1386,22 +1410,32 @@ catch_cg(C, {var,R}, Le, Vdb, Bef, St0) ->
 %%
 %%  put_list for constructing a cons is an atomic instruction
 %%  which can safely resuse one of the source registers as target.
-%%  Also binaries can reuse a source register as target.
 
 set_cg([{var,R}], {cons,Es}, Le, Vdb, Bef, St) ->
-    [S1,S2] = map(fun ({var,V}) -> fetch_var(V, Bef);
-		      (Other) -> Other
-		  end, Es),
+    [S1,S2] = cg_reg_args(Es, Bef),
     Int0 = clear_dead(Bef, Le#l.i, Vdb),
     Int1 = Int0#sr{reg=put_reg(R, Int0#sr.reg)},
     Ret = fetch_reg(R, Int1#sr.reg),
     {[{put_list,S1,S2,Ret}], Int1, St};
 set_cg([{var,R}], {binary,Segs}, Le, Vdb, Bef,
        #cg{in_catch=InCatch, bfail=Bfail}=St) ->
+    %% At run-time, binaries are constructed in three stages:
+    %% 1) First the size of the binary is calculated.
+    %% 2) Then the binary is allocated.
+    %% 3) Then each field in the binary is constructed.
+    %% For simplicity, we use the target register to also hold the
+    %% size of the binary. Therefore the target register must *not*
+    %% be one of the source registers.
+
+    %% First allocate the target register.
     Int0 = Bef#sr{reg=put_reg(R, Bef#sr.reg)},
     Target = fetch_reg(R, Int0#sr.reg),
-    Fail = {f,Bfail},
+
+    %% Also allocate a scratch register for size calculations.
     Temp = find_scratch_reg(Int0#sr.reg),
+
+    %% First generate the code that constructs each field.
+    Fail = {f,Bfail},
     PutCode = cg_bin_put(Segs, Fail, Bef),
     {Sis,Int1} =
 	case InCatch of
@@ -1410,6 +1444,8 @@ set_cg([{var,R}], {binary,Segs}, Le, Vdb, Bef,
 	end,
     MaxRegs = max_reg(Bef#sr.reg),
     Aft = clear_dead(Int1, Le#l.i, Vdb),
+
+    %% Now generate the complete code for constructing the binary.
     Code = cg_binary(PutCode, Target, Temp, Fail, MaxRegs, Le#l.a),
     {Sis++Code,Aft,St};
 set_cg([{var,R}], Con, Le, Vdb, Bef, St) ->
@@ -1419,10 +1455,8 @@ set_cg([{var,R}], Con, Le, Vdb, Bef, St) ->
     Ais = case Con of
 	      {tuple,Es} ->
 		  [{put_tuple,length(Es),Ret}] ++ cg_build_args(Es, Bef);
-	      {var,V} ->	  % Normally removed by kernel optimizer.
-		  [{move,fetch_var(V, Int),Ret}];
 	      Other ->
-		  [{move,Other,Ret}]
+		  [{move,cg_reg_arg(Other, Int),Ret}]
 	  end,
     {Ais,clear_dead(Int, Le#l.i, Vdb),St}.
 
@@ -1576,8 +1610,7 @@ cg_gen_binsize([], _, _, _, _, Acc) -> Acc.
 %% cg_bin_opt(Code0) -> Code
 %%  Optimize the size calculations for binary construction.
 
-cg_bin_opt([{move,Size,D},{bs_append,Fail,D,Extra,Regs0,U,Bin,Flags,D}|Is]) ->
-    Regs = cg_bo_newregs(Regs0, D),
+cg_bin_opt([{move,Size,D},{bs_append,Fail,D,Extra,Regs,U,Bin,Flags,D}|Is]) ->
     cg_bin_opt([{bs_append,Fail,Size,Extra,Regs,U,Bin,Flags,D}|Is]);
 cg_bin_opt([{move,Size,D},{bs_private_append,Fail,D,U,Bin,Flags,D}|Is]) ->
     cg_bin_opt([{bs_private_append,Fail,Size,U,Bin,Flags,D}|Is]);
@@ -1585,9 +1618,8 @@ cg_bin_opt([{move,{integer,0},D},{bs_add,_,[D,{integer,_}=S,1],Dst}|Is]) ->
     cg_bin_opt([{move,S,Dst}|Is]);
 cg_bin_opt([{move,{integer,0},D},{bs_add,Fail,[D,S,U],Dst}|Is]) ->
     cg_bin_opt([{bs_add,Fail,[{integer,0},S,U],Dst}|Is]);
-cg_bin_opt([{move,{integer,Bytes},D},{Op,Fail,D,Extra,Regs0,Flags,D}|Is])
+cg_bin_opt([{move,{integer,Bytes},D},{Op,Fail,D,Extra,Regs,Flags,D}|Is])
   when Op =:= bs_init2; Op =:= bs_init_bits ->
-    Regs = cg_bo_newregs(Regs0, D),
     cg_bin_opt([{Op,Fail,Bytes,Extra,Regs,Flags,D}|Is]);
 cg_bin_opt([{move,Src1,Dst},{bs_add,Fail,[Dst,Src2,U],Dst}|Is]) ->
     cg_bin_opt([{bs_add,Fail,[Src1,Src2,U],Dst}|Is]);
@@ -1595,20 +1627,9 @@ cg_bin_opt([I|Is]) ->
     [I|cg_bin_opt(Is)];
 cg_bin_opt([]) -> [].
 
-cg_bo_newregs(R, {x,X}) when R-1 =:= X -> R-1;
-cg_bo_newregs(R, _) -> R.
-
-%% Common for new and old binary code generation.
-
 cg_bin_put({bin_seg,[],S0,U,T,Fs,[E0,Next]}, Fail, Bef) ->
-    S1 = case S0 of
-	     {var,Sv} -> fetch_var(Sv, Bef);
-	     _ -> S0
-	 end,
-    E1 = case E0 of
-	     {var,V} -> fetch_var(V, Bef);
-	     Other ->   Other
-	 end,
+    S1 = cg_reg_arg(S0, Bef),
+    E1 = cg_reg_arg(E0, Bef),
     {Format,Op} = case T of
 		      integer -> {plain,bs_put_integer};
 		      utf8 ->    {utf,bs_put_utf8};
@@ -1626,9 +1647,7 @@ cg_bin_put({bin_seg,[],S0,U,T,Fs,[E0,Next]}, Fail, Bef) ->
 cg_bin_put({bin_end,[]}, _, _) -> [].
 
 cg_build_args(As, Bef) ->
-    map(fun ({var,V}) -> {put,fetch_var(V, Bef)};
-	    (Other) -> {put,Other}
-	end, As).
+    [{put,cg_reg_arg(A, Bef)} || A <- As].
 
 %% return_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}.
 %% break_cg([Val], Le, Vdb, Bef, St) -> {[Ainstr],Aft,St}.
@@ -1907,27 +1926,13 @@ fetch_var(V, Sr) ->
 	error -> fetch_stack(V, Sr#sr.stk)
     end.
 
-% find_var(V, Sr) ->
-%     case find_reg(V, Sr#sr.reg) of
-% 	{ok,R} -> {ok,R};
-% 	error ->
-% 	    case find_stack(V, Sr#sr.stk) of
-% 		{ok,S} -> {ok,S};
-% 		error -> error
-% 	    end
-%     end.
-
 load_vars(Vs, Regs) ->
     foldl(fun ({var,V}, Rs) -> put_reg(V, Rs) end, Regs, Vs).
 
 %% put_reg(Val, Regs) -> Regs.
-%% free_reg(Val, Regs) -> Regs.
 %% find_reg(Val, Regs) -> ok{r{R}} | error.
 %% fetch_reg(Val, Regs) -> r{R}.
 %%  Functions to interface the registers.
-%%  put_reg puts a value into a free register,
-%%  load_reg loads a value into a fixed register
-%%  free_reg frees a register containing a specific value.
 
 % put_regs(Vs, Rs) -> foldl(fun put_reg/2, Rs, Vs).
 
@@ -1938,10 +1943,6 @@ put_reg_1(V, [{reserved,I,V}|Rs], I) -> [{I,V}|Rs];
 put_reg_1(V, [R|Rs], I) -> [R|put_reg_1(V, Rs, I+1)];
 put_reg_1(V, [], I) -> [{I,V}].
 
-% free_reg(V, [{I,V}|Rs]) -> [free|Rs];
-% free_reg(V, [R|Rs]) -> [R|free_reg(V, Rs)];
-% free_reg(V, []) -> [].
-
 fetch_reg(V, [{I,V}|_]) -> {x,I};
 fetch_reg(V, [_|SRs]) -> fetch_reg(V, SRs).
 
@@ -1958,9 +1959,6 @@ find_scratch_reg([free|_], I) -> {x,I};
 find_scratch_reg([_|Rs], I) -> find_scratch_reg(Rs, I+1);
 find_scratch_reg([], I) -> {x,I}.
 
-%%copy_reg(Val, R, Regs) -> load_reg(Val, R, Regs).
-%%move_reg(Val, R, Regs) -> load_reg(Val, R, free_reg(Val, Regs)).
-
 replace_reg_contents(Old, New, [{I,Old}|Rs]) -> [{I,New}|Rs];
 replace_reg_contents(Old, New, [R|Rs]) -> [R|replace_reg_contents(Old, New, Rs)].
 
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index b184987625..8ef71e1346 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -81,7 +81,7 @@
 -export([module/2,format_error/1]).
 
 -import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2,
-		keymember/3,keyfind/3]).
+		keymember/3,keyfind/3,partition/2]).
 -import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]).
 -import(cerl, [c_tuple/1]).
 
@@ -1081,9 +1081,44 @@ select_bin_con(Cs0) ->
 		       end, Cs0),
     select_bin_con_1(Cs1).
 
+
 select_bin_con_1(Cs) ->
     try
-	select_bin_int(Cs)
+	%% The usual way to match literals is to first extract the
+	%% value to a register, and then compare the register to the
+	%% literal value. Extracting the value is good if we need
+	%% compare it more than once.
+	%%
+	%% But we would like to combine the extracting and the
+	%% comparing into a single instruction if we know that
+	%% a binary segment must contain specific integer value
+	%% or the matching will fail, like in this example:
+	%%
+	%% <<42:8,...>> ->
+	%% <<42:8,...>> ->
+	%% .
+	%% .
+	%% .
+	%% <<42:8,...>> ->
+	%% <<>> ->
+	%%
+	%% The first segment must either contain the integer 42
+	%% or the binary must end for the match to succeed.
+	%%
+	%% The way we do is to replace the generic #k_bin_seg{}
+	%% record with a #k_bin_int{} record if all clauses will
+	%% select the same literal integer (except for one or more
+	%% clauses that will end the binary).
+
+	{BinSegs0,BinEnd} =
+	    partition(fun (C) ->
+			      clause_con(C) =:= k_bin_seg
+		      end, Cs),
+	BinSegs = select_bin_int(BinSegs0),
+	case BinEnd of
+	    [] -> BinSegs;
+	    [_|_] -> BinSegs ++ [{k_bin_end,BinEnd}]
+	end
     catch
 	throw:not_possible ->
 	    select_bin_con_2(Cs)
@@ -1097,7 +1132,7 @@ select_bin_con_2([]) -> [].
 
 %% select_bin_int([Clause]) -> {k_bin_int,[Clause]}
 %%  If the first pattern in each clause selects the same integer,
-%%  rewrite all clauses to use #k_bin_int{} (which will later to
+%%  rewrite all clauses to use #k_bin_int{} (which will later be
 %%  translated to a bs_match_string/4 instruction).
 %%
 %%  If it is not possible to do this rewrite, a 'not_possible'
@@ -1346,7 +1381,7 @@ clause_arg(#iclause{pats=[Arg|_]}) -> Arg.
 
 clause_con(C) -> arg_con(clause_arg(C)).
 
-clause_val(C) -> arg_val(clause_arg(C)).
+clause_val(C) -> arg_val(clause_arg(C), C).
 
 is_var_clause(C) -> clause_con(C) =:= k_var.
 
@@ -1377,7 +1412,7 @@ arg_con(Arg) ->
 	#k_var{} -> k_var
     end.
 
-arg_val(Arg) ->
+arg_val(Arg, C) ->
     case arg_arg(Arg) of
 	#k_literal{val=Lit} -> Lit;
 	#k_int{val=I} -> I;
@@ -1385,7 +1420,13 @@ arg_val(Arg) ->
 	#k_atom{val=A} -> A;
 	#k_tuple{es=Es} -> length(Es);
 	#k_bin_seg{size=S,unit=U,type=T,flags=Fs} ->
-	    {set_kanno(S, []),U,T,Fs}
+	    case S of
+		#k_var{name=V} ->
+		    #iclause{isub=Isub} = C,
+		    {#k_var{name=get_vsub(V, Isub)},U,T,Fs};
+		_ ->
+		    {set_kanno(S, []),U,T,Fs}
+	    end
     end.
 
 %% ubody_used_vars(Expr, State) -> [UsedVar]
diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
index e047166ade..3b065ec3b9 100644
--- a/lib/compiler/test/Makefile
+++ b/lib/compiler/test/Makefile
@@ -10,7 +10,7 @@ MODULES= \
 	apply_SUITE \
 	beam_validator_SUITE \
 	beam_disasm_SUITE \
-	beam_expect_SUITE \
+	beam_except_SUITE \
 	bs_bincomp_SUITE \
 	bs_bit_binaries_SUITE \
 	bs_construct_SUITE \
@@ -39,7 +39,7 @@ MODULES= \
 NO_OPT= \
 	andor \
 	apply \
-	beam_expect \
+	beam_except \
 	bs_construct \
         bs_match \
 	bs_utf \
diff --git a/lib/compiler/test/andor_SUITE.erl b/lib/compiler/test/andor_SUITE.erl
index f7388f1614..fe69aeeb43 100644
--- a/lib/compiler/test/andor_SUITE.erl
+++ b/lib/compiler/test/andor_SUITE.erl
@@ -29,11 +29,12 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [t_case, t_and_or, t_andalso, t_orelse, inside, overlap,
-     combined, in_case, before_and_inside_if].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [t_case,t_and_or,t_andalso,t_orelse,inside,overlap,
+       combined,in_case,before_and_inside_if]}].
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/beam_expect_SUITE.erl b/lib/compiler/test/beam_except_SUITE.erl
index 6f216eac4f..6b55224a42 100644
--- a/lib/compiler/test/beam_expect_SUITE.erl
+++ b/lib/compiler/test/beam_except_SUITE.erl
@@ -16,7 +16,7 @@
 %%
 %% %CopyrightEnd%
 %%
--module(beam_expect_SUITE).
+-module(beam_except_SUITE).
 
 -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1,
 	 init_per_group/2,end_per_group/2,
diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl
index 902867bc19..c84c83795a 100644
--- a/lib/compiler/test/beam_validator_SUITE.erl
+++ b/lib/compiler/test/beam_validator_SUITE.erl
@@ -47,17 +47,18 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [beam_files, compiler_bug, stupid_but_valid, xrange,
-     yrange, stack, call_last, merge_undefined, uninit,
-     unsafe_catch, dead_code, mult_labels,
-     overwrite_catchtag, overwrite_trytag, accessing_tags,
-     bad_catch_try, cons_guard, freg_range, freg_uninit,
-     freg_state, bin_match, bin_aligned, bad_dsetel,
-     state_after_fault_in_catch, no_exception_in_catch,
-     undef_label, illegal_instruction, failing_gc_guard_bif].
+    [beam_files,{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [compiler_bug,stupid_but_valid,xrange,
+       yrange,stack,call_last,merge_undefined,uninit,
+       unsafe_catch,dead_code,mult_labels,
+       overwrite_catchtag,overwrite_trytag,accessing_tags,
+       bad_catch_try,cons_guard,freg_range,freg_uninit,
+       freg_state,bin_match,bin_aligned,bad_dsetel,
+       state_after_fault_in_catch,no_exception_in_catch,
+       undef_label,illegal_instruction,failing_gc_guard_bif]}].
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/bs_bit_binaries_SUITE.erl b/lib/compiler/test/bs_bit_binaries_SUITE.erl
index 30276f1259..897b4769f1 100644
--- a/lib/compiler/test/bs_bit_binaries_SUITE.erl
+++ b/lib/compiler/test/bs_bit_binaries_SUITE.erl
@@ -34,13 +34,15 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [misc, horrid_match, test_bitstr, test_bit_size,
-     asymmetric_tests, big_asymmetric_tests,
-     binary_to_and_from_list, big_binary_to_and_from_list,
-     send_and_receive, send_and_receive_alot].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [misc,horrid_match,test_bitstr,test_bit_size,
+       asymmetric_tests,big_asymmetric_tests,
+       binary_to_and_from_list,big_binary_to_and_from_list,
+       send_and_receive,send_and_receive_alot]}].
+      
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/bs_construct_SUITE.erl b/lib/compiler/test/bs_construct_SUITE.erl
index 9ab76449c7..4ea5235bb6 100644
--- a/lib/compiler/test/bs_construct_SUITE.erl
+++ b/lib/compiler/test/bs_construct_SUITE.erl
@@ -36,12 +36,14 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [two, test1, fail, float_bin, in_guard, in_catch,
-     nasty_literals, side_effect, opt, otp_7556, float_arith,
-     otp_8054].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [two,test1,fail,float_bin,in_guard,in_catch,
+       nasty_literals,side_effect,opt,otp_7556,float_arith,
+       otp_8054]}].
+
 
 init_per_suite(Config) ->
     Config.
@@ -360,6 +362,11 @@ in_catch(Config) when is_list(Config) ->
     ?line <<255>> = small(255, <<1,2,3,4,5,6,7,8,9>>),
     ?line <<1,2>> = small(<<7,8,9,10>>, 258),
     ?line <<>> = small(<<1,2,3,4,5>>, <<7,8,9,10>>),
+
+    <<15,240,0,42>> = small2(255, 42),
+    <<7:20>> = small2(<<1,2,3>>, 7),
+    <<300:12>> = small2(300, <<1,2,3>>),
+    <<>> = small2(<<1>>, <<2>>),
     ok.
 
 small(A, B) ->
@@ -381,6 +388,25 @@ small(A, B) ->
     end,
     <<ResA/binary,ResB/binary>>.
 
+small2(A, B) ->
+    case begin
+	     case catch <<A:12>> of
+		 {'EXIT',_} -> <<>>;
+		 ResA0 -> ResA0
+	     end
+	 end of
+	ResA -> ok
+    end,
+    case begin
+	     case catch <<B:20>> of
+		 {'EXIT',_} -> <<>>;
+		 ResB0 -> ResB0
+	     end
+	 end of
+	ResB -> ok
+    end,
+    <<ResA/binary-unit:1,ResB/binary-unit:1>>.
+
 nasty_literals(Config) when is_list(Config) ->
     case erlang:system_info(endian) of
 	big ->
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 86c8cb23f5..d63d2235d7 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -33,7 +33,7 @@
 	 matching_meets_construction/1,simon/1,matching_and_andalso/1,
 	 otp_7188/1,otp_7233/1,otp_7240/1,otp_7498/1,
 	 match_string/1,zero_width/1,bad_size/1,haystack/1,
-	 cover_beam_bool/1]).
+	 cover_beam_bool/1,matched_out_size/1,follow_fail_branch/1]).
 
 -export([coverage_id/1,coverage_external_ignore/2]).
 
@@ -44,19 +44,21 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [fun_shadow, int_float, otp_5269, null_fields, wiger,
-     bin_tail, save_restore, shadowed_size_var,
-     partitioned_bs_match, function_clause, unit,
-     shared_sub_bins, bin_and_float, dec_subidentifiers,
-     skip_optional_tag, wfbm, degenerated_match, bs_sum,
-     coverage, multiple_uses, zero_label, followed_by_catch,
-     matching_meets_construction, simon,
-     matching_and_andalso, otp_7188, otp_7233, otp_7240,
-     otp_7498, match_string, zero_width, bad_size, haystack,
-     cover_beam_bool].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [fun_shadow,int_float,otp_5269,null_fields,wiger,
+       bin_tail,save_restore,shadowed_size_var,
+       partitioned_bs_match,function_clause,unit,
+       shared_sub_bins,bin_and_float,dec_subidentifiers,
+       skip_optional_tag,wfbm,degenerated_match,bs_sum,
+       coverage,multiple_uses,zero_label,followed_by_catch,
+       matching_meets_construction,simon,
+       matching_and_andalso,otp_7188,otp_7233,otp_7240,
+       otp_7498,match_string,zero_width,bad_size,haystack,
+       cover_beam_bool,matched_out_size,follow_fail_branch]}].
+
 
 init_per_suite(Config) ->
     Config.
@@ -1079,6 +1081,59 @@ do_cover_beam_bool(Bin, X) when X > 0 ->
 do_cover_beam_bool(<<_,Bin/binary>>, X) ->
     do_cover_beam_bool(Bin, X+1).
 
+matched_out_size(Config) when is_list(Config) ->
+    {253,16#DEADBEEF} = mos_int(<<8,253,16#DEADBEEF:32>>),
+    {6,16#BEEFDEAD} = mos_int(<<3,6:3,16#BEEFDEAD:32>>),
+    {53,16#CAFEDEADBEEFCAFE} = mos_int(<<16,53:16,16#CAFEDEADBEEFCAFE:64>>),
+    {23,16#CAFEDEADBEEFCAFE} = mos_int(<<5,23:5,16#CAFEDEADBEEFCAFE:64>>),
+
+    {<<1,2,3>>,4} = mos_bin(<<3,1,2,3,4,3>>),
+    {<<1,2,3,7>>,19,42} = mos_bin(<<4,1,2,3,7,19,4,42>>),
+    <<1,2,3,7>> = mos_bin(<<4,1,2,3,7,"abcdefghij">>),
+
+    ok.
+
+mos_int(<<L,I:L,X:32>>) ->
+    {I,X};
+mos_int(<<L,I:L,X:64>>) ->
+    {I,X}.
+
+mos_bin(<<L,Bin:L/binary,X:8,L>>) ->
+    L = byte_size(Bin),
+    {Bin,X};
+mos_bin(<<L,Bin:L/binary,X:8,L,Y:8>>) ->
+    L = byte_size(Bin),
+    {Bin,X,Y};
+mos_bin(<<L,Bin:L/binary,"abcdefghij">>) ->
+    L = byte_size(Bin),
+    Bin.
+
+follow_fail_branch(_) ->
+    42 = ffb_1(<<0,1>>, <<0>>),
+    8 = ffb_1(<<0,1>>, [a]),
+    42 = ffb_2(<<0,1>>, <<0>>, 17),
+    8 = ffb_2(<<0,1>>, [a], 0),
+    ok.
+
+ffb_1(<<_,T/bitstring>>, List) ->
+    case List of
+	<<_>> ->
+	    42;
+	[_|_] ->
+	    %% The fail branch of the bs_start_match2 instruction
+	    %% pointing to here would be ignored, making the compiler
+	    %% incorrectly assume that the delayed sub-binary
+	    %% optimization was safe.
+	    bit_size(T)
+    end.
+
+ffb_2(<<_,T/bitstring>>, List, A) ->
+    case List of
+	<<_>> when A =:= 17 -> 42;
+	[_|_] -> bit_size(T)
+    end.
+
+
 check(F, R) ->
     R = F().
 
diff --git a/lib/compiler/test/compilation_SUITE.erl b/lib/compiler/test/compilation_SUITE.erl
index fed7bec7d4..bec97b0199 100644
--- a/lib/compiler/test/compilation_SUITE.erl
+++ b/lib/compiler/test/compilation_SUITE.erl
@@ -28,26 +28,29 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [self_compile_old_inliner, self_compile, compiler_1,
-     compiler_3, compiler_5, beam_compiler_1,
-     beam_compiler_2, beam_compiler_3, beam_compiler_4,
-     beam_compiler_5, beam_compiler_6, beam_compiler_7,
-     beam_compiler_8, beam_compiler_9, beam_compiler_10,
-     beam_compiler_11, beam_compiler_12,
-     nested_tuples_in_case_expr, otp_2330, guards,
-     {group, vsn}, otp_2380, otp_2141, otp_2173, otp_4790,
-     const_list_256, bin_syntax_1, bin_syntax_2,
-     bin_syntax_3, bin_syntax_4, bin_syntax_5, bin_syntax_6,
-     live_var, convopts, bad_functional_value,
-     catch_in_catch, redundant_case, long_string, otp_5076,
-     complex_guard, otp_5092, otp_5151, otp_5235, otp_5244,
-     trycatch_4, opt_crash, otp_5404, otp_5436, otp_5481,
-     otp_5553, otp_5632, otp_5714, otp_5872, otp_6121,
-     otp_6121a, otp_6121b, otp_7202, otp_7345, on_load,
-     string_table,otp_8949_a,otp_8949_a,split_cases].
+    [self_compile_old_inliner,self_compile,
+     {group,p}].
 
 groups() -> 
-    [{vsn, [], [vsn_1, vsn_2, vsn_3]}].
+    [{vsn,[parallel],[vsn_1,vsn_2,vsn_3]},
+     {p,test_lib:parallel(),
+      [compiler_1,
+       compiler_3,compiler_5,beam_compiler_1,
+       beam_compiler_2,beam_compiler_3,beam_compiler_4,
+       beam_compiler_5,beam_compiler_6,beam_compiler_7,
+       beam_compiler_8,beam_compiler_9,beam_compiler_10,
+       beam_compiler_11,beam_compiler_12,
+       nested_tuples_in_case_expr,otp_2330,guards,
+       {group,vsn},otp_2380,otp_2141,otp_2173,otp_4790,
+       const_list_256,bin_syntax_1,bin_syntax_2,
+       bin_syntax_3,bin_syntax_4,bin_syntax_5,bin_syntax_6,
+       live_var,convopts,bad_functional_value,
+       catch_in_catch,redundant_case,long_string,otp_5076,
+       complex_guard,otp_5092,otp_5151,otp_5235,otp_5244,
+       trycatch_4,opt_crash,otp_5404,otp_5436,otp_5481,
+       otp_5553,otp_5632,otp_5714,otp_5872,otp_6121,
+       otp_6121a,otp_6121b,otp_7202,otp_7345,on_load,
+       string_table,otp_8949_a,otp_8949_a,split_cases]}].
 
 init_per_suite(Config) ->
     Config.
@@ -623,7 +626,7 @@ string_table(Config) when is_list(Config) ->
     ?line File = filename:join(DataDir, "string_table.erl"),
     ?line {ok,string_table,Beam,[]} = compile:file(File, [return, binary]),
     ?line {ok,{string_table,[StringTableChunk]}} = beam_lib:chunks(Beam, ["StrT"]),
-    ?line {"StrT", <<"stringabletringtable">>} = StringTableChunk,
+    ?line {"StrT", <<"stringtable">>} = StringTableChunk,
     ok.
 
 otp_8949_a(Config) when is_list(Config) ->
diff --git a/lib/compiler/test/core_SUITE.erl b/lib/compiler/test/core_SUITE.erl
index 06185bfc34..a40dc32d59 100644
--- a/lib/compiler/test/core_SUITE.erl
+++ b/lib/compiler/test/core_SUITE.erl
@@ -43,11 +43,13 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [dehydrated_itracer,nested_tries,seq_in_guard,make_effect_seq,
-     eval_is_boolean,unsafe_case,nomatch_shadow,reversed_annos].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [dehydrated_itracer,nested_tries,seq_in_guard,make_effect_seq,
+       eval_is_boolean,unsafe_case,nomatch_shadow,reversed_annos]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/core_fold_SUITE.erl b/lib/compiler/test/core_fold_SUITE.erl
index 54bd52947e..2adc71c237 100644
--- a/lib/compiler/test/core_fold_SUITE.erl
+++ b/lib/compiler/test/core_fold_SUITE.erl
@@ -31,11 +31,13 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [t_element, setelement, t_length, append, t_apply, bifs,
-     eq, nested_call_in_case, guard_try_catch, coverage].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [t_element,setelement,t_length,append,t_apply,bifs,
+       eq,nested_call_in_case,guard_try_catch,coverage]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/error_SUITE.erl b/lib/compiler/test/error_SUITE.erl
index fb51e013ce..859c4571ea 100644
--- a/lib/compiler/test/error_SUITE.erl
+++ b/lib/compiler/test/error_SUITE.erl
@@ -22,16 +22,21 @@
 
 -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 
 	 init_per_group/2,end_per_group/2,
-	 head_mismatch_line/1,warnings_as_errors/1, bif_clashes/1]).
+	 head_mismatch_line/1,warnings_as_errors/1, bif_clashes/1,
+	 transforms/1]).
+
+%% Used by transforms/1 test case.
+-export([parse_transform/2]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [head_mismatch_line, warnings_as_errors, bif_clashes].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [head_mismatch_line,warnings_as_errors,bif_clashes,transforms]}].
 
 init_per_suite(Config) ->
     Config.
@@ -216,6 +221,24 @@ warnings_as_errors(Config) when is_list(Config) ->
 
     ok.
 
+transforms(Config) ->
+    Ts1 = [{undef_parse_transform,
+	    <<"
+              -compile({parse_transform,non_existing}).
+             ">>,
+	    [return],
+	    {error,[{none,compile,{undef_parse_transform,non_existing}}],[]}}],
+    [] = run(Config, Ts1),
+    Ts2 = <<"
+              -compile({parse_transform,",?MODULE_STRING,"}).
+             ">>,
+    {error,[{none,compile,{parse_transform,?MODULE,{too_bad,_}}}],[]} =
+	run_test(Ts2, test_filename(Config), [], dont_write_beam),
+    ok.
+
+parse_transform(_, _) ->
+    error(too_bad).
+
 
 run(Config, Tests) ->
     ?line File = test_filename(Config),
@@ -260,12 +283,14 @@ filter(X) ->
 %% Compiles a test module and returns the list of errors and warnings.
 
 test_filename(Conf) ->
-    Filename = "errors_test.erl",
+    Filename = ["errors_test_",test_lib:uniq(),".erl"],
     DataDir = ?config(priv_dir, Conf),
     filename:join(DataDir, Filename).
 
 run_test(Test0, File, Warnings, WriteBeam) ->
-    ?line Test = ["-module(errors_test). ", Test0],
+    ModName = filename:rootname(filename:basename(File), ".erl"),
+    Mod = list_to_atom(ModName),
+    Test = ["-module(",ModName,"). ",Test0],
     ?line Opts = case WriteBeam of
 		     dont_write_beam ->
 			 [binary,return_errors|Warnings];
@@ -279,17 +304,17 @@ run_test(Test0, File, Warnings, WriteBeam) ->
 
     %% Test result of compilation.
     ?line Res = case compile:file(File, Opts) of
-		    {ok,errors_test,_,[{_File,Ws}]} ->
+		    {ok,Mod,_,[{_File,Ws}]} ->
 			%io:format("compile:file(~s,~p) ->~n~p~n",
 			%	  [File,Opts,Ws]),
 			{warning,Ws};
-		    {ok,errors_test,_,[]} ->
+		    {ok,Mod,_,[]} ->
 			%io:format("compile:file(~s,~p) ->~n~p~n",
 			%	  [File,Opts,Ws]),
 			[];
-		    {ok,errors_test,[{_File,Ws}]} ->
+		    {ok,Mod,[{_File,Ws}]} ->
 			{warning,Ws};
-		    {ok,errors_test,[]} ->
+		    {ok,Mod,[]} ->
 			[];
 		    {error,[{XFile,Es}],Ws} = _ZZ when is_list(XFile) ->
 			%io:format("compile:file(~s,~p) ->~n~p~n",
diff --git a/lib/compiler/test/guard_SUITE.erl b/lib/compiler/test/guard_SUITE.erl
index 40711783ed..66c0b9a295 100644
--- a/lib/compiler/test/guard_SUITE.erl
+++ b/lib/compiler/test/guard_SUITE.erl
@@ -39,17 +39,18 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [misc, const_cond, basic_not, complex_not, nested_nots,
-     semicolon, complex_semicolon, comma, or_guard,
-     more_or_guards, complex_or_guards, and_guard, xor_guard,
-     more_xor_guards, build_in_guard, old_guard_tests, gbif,
-     t_is_boolean, is_function_2, tricky, rel_ops,
-     literal_type_tests, basic_andalso_orelse, traverse_dcd,
-     check_qlc_hrl, andalso_semi, t_tuple_size, binary_part,
-     bad_constants].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [misc,const_cond,basic_not,complex_not,nested_nots,
+       semicolon,complex_semicolon,comma,or_guard,
+       more_or_guards,complex_or_guards,and_guard,xor_guard,
+       more_xor_guards,build_in_guard,old_guard_tests,gbif,
+       t_is_boolean,is_function_2,tricky,rel_ops,
+       literal_type_tests,basic_andalso_orelse,traverse_dcd,
+       check_qlc_hrl,andalso_semi,t_tuple_size,binary_part,
+       bad_constants]}].
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/inline_SUITE.erl b/lib/compiler/test/inline_SUITE.erl
index 2e17d3fde6..e2eb6a0dec 100644
--- a/lib/compiler/test/inline_SUITE.erl
+++ b/lib/compiler/test/inline_SUITE.erl
@@ -32,17 +32,22 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [attribute, bsdecode, bsdes, barnes2, decode1, smith,
-     itracer, pseudoknot, comma_splitter, lists, really_inlined, otp_7223,
-     coverage].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [attribute,bsdecode,bsdes,barnes2,decode1,smith,
+       itracer,pseudoknot,comma_splitter,lists,really_inlined,otp_7223,
+       coverage]}].
 
 init_per_suite(Config) ->
-    Config.
+    Pa = "-pa " ++ filename:dirname(code:which(?MODULE)),
+    {ok,Node} = start_node(compiler, Pa),
+    [{testing_node,Node}|Config].
 
-end_per_suite(_Config) ->
+end_per_suite(Config) ->
+    Node = ?config(testing_node, Config),
+    ?t:stop_node(Node),
     ok.
 
 init_per_group(_GroupName, Config) ->
@@ -81,6 +86,7 @@ attribute(Config) when is_list(Config) ->
 ?comp(comma_splitter).
 
 try_inline(Mod, Config) ->
+    Node = ?config(testing_node, Config),
     ?line Src = filename:join(?config(data_dir, Config), atom_to_list(Mod)),
     ?line Out = ?config(priv_dir,Config),
 
@@ -89,8 +95,6 @@ try_inline(Mod, Config) ->
     ?line {ok,Mod} = compile:file(Src, [{outdir,Out},report,bin_opt_info,clint]),
 
     ?line Dog = test_server:timetrap(test_server:minutes(10)),
-    ?line Pa = "-pa " ++ filename:dirname(code:which(?MODULE)),
-    ?line {ok,Node} = start_node(compiler, Pa),
     ?line NormalResult = rpc:call(Node, ?MODULE, load_and_call, [Out,Mod]),
     ?line test_server:timetrap_cancel(Dog),
 
@@ -125,7 +129,6 @@ try_inline(Mod, Config) ->
     %% Delete Beam file.
     ?line ok = file:delete(filename:join(Out, atom_to_list(Mod)++code:objfile_extension())),
 
-    ?line ?t:stop_node(Node),
     ok.
 
 compare(Same, Same) -> ok;
@@ -293,9 +296,9 @@ otp_7223_2({a}) ->
     1.
 
 coverage(Config) when is_list(Config) ->
-    ?line Src = filename:join(?config(data_dir, Config), bsdecode),
-    ?line Out = ?config(priv_dir,Config),
-    ?line {ok,Mod} = compile:file(Src, [{outdir,Out},report,{inline,0},clint]),
-    ?line {ok,Mod} = compile:file(Src, [{outdir,Out},report,{inline,20},verbose,clint]),
-    ?line ok = file:delete(filename:join(Out, "bsdecode"++code:objfile_extension())),
+    Mod = bsdecode,
+    Src = filename:join(?config(data_dir, Config), Mod),
+    {ok,Mod,_} = compile:file(Src, [binary,report,{inline,0},clint]),
+    {ok,Mod,_} = compile:file(Src, [binary,report,{inline,20},
+				    verbose,clint]),
     ok.
diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl
index 9406d7de8f..de44926d81 100644
--- a/lib/compiler/test/match_SUITE.erl
+++ b/lib/compiler/test/match_SUITE.erl
@@ -30,11 +30,13 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [pmatch, mixed, aliases, match_in_call, untuplify,
-     shortcut_boolean, letify_guard, selectify, underscore, coverage].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [pmatch,mixed,aliases,match_in_call,untuplify,
+       shortcut_boolean,letify_guard,selectify,underscore,coverage]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index 0376c7ef3e..44c7161530 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -57,11 +57,12 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 -spec all() -> misc_SUITE_test_cases().
 all() -> 
     test_lib:recompile(?MODULE),
-    [tobias, empty_string, md5, silly_coverage,
-     confused_literals, integer_encoding, override_bif].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,[],%%test_lib:parallel(),
+      [tobias,empty_string,md5,silly_coverage,
+       confused_literals,integer_encoding,override_bif]}].
 
 init_per_suite(Config) ->
     Config.
@@ -182,6 +183,14 @@ silly_coverage(Config) when is_list(Config) ->
     CodegenInput = {?MODULE,[{foo,0}],[],[{function,foo,0,[a|b],a,b,[]}]},
     ?line expect_error(fun() -> v3_codegen:module(CodegenInput, []) end),
 
+    %% beam_a
+    BeamAInput = {?MODULE,[{foo,0}],[],
+		  [{function,foo,0,2,
+		    [{label,1},
+		     {func_info,{atom,?MODULE},{atom,foo},0},
+		     {label,2}|non_proper_list]}],99},
+    expect_error(fun() -> beam_a:module(BeamAInput, []) end),
+
     %% beam_block
     BlockInput = {?MODULE,[{foo,0}],[],
 		  [{function,foo,0,2,
@@ -263,6 +272,13 @@ silly_coverage(Config) when is_list(Config) ->
 		       {block,[a|b]}]}],0},
     ?line expect_error(fun() -> beam_receive:module(ReceiveInput, []) end),
 
+    BeamZInput = {?MODULE,[{foo,0}],[],
+		  [{function,foo,0,2,
+		    [{label,1},
+		     {func_info,{atom,?MODULE},{atom,foo},0},
+		     {label,2}|non_proper_list]}],99},
+    expect_error(fun() -> beam_z:module(BeamZInput, []) end),
+
     ok.
 
 expect_error(Fun) ->
diff --git a/lib/compiler/test/receive_SUITE.erl b/lib/compiler/test/receive_SUITE.erl
index 2a67615e5e..82c823b789 100644
--- a/lib/compiler/test/receive_SUITE.erl
+++ b/lib/compiler/test/receive_SUITE.erl
@@ -40,10 +40,12 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [recv, coverage, otp_7980, ref_opt, export].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [recv,coverage,otp_7980,ref_opt,export]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/no_4.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/no_4.erl
new file mode 100644
index 0000000000..3ce222176b
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/no_4.erl
@@ -0,0 +1,12 @@
+-module(no_4).
+-compile(export_all).
+
+?MODULE() ->
+    ok.
+
+f(X) ->
+    {Pid,Ref} = spawn_monitor(fun() -> ok end),
+    r(Pid, Ref).
+
+r(_, _) ->
+    ok.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_10.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_10.erl
new file mode 100644
index 0000000000..7ce6e6103c
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_10.erl
@@ -0,0 +1,13 @@
+-module(yes_10).
+-compile(export_all).
+
+?MODULE() ->
+    ok.
+
+f() ->
+    Ref = make_ref(),
+    receive
+	%% Artifical example to cover more code in beam_receive.
+	{X,Y} when Ref =/= X, Ref =:= Y ->
+	    ok
+    end.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_11.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_11.erl
new file mode 100644
index 0000000000..62f439fc42
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_11.erl
@@ -0,0 +1,21 @@
+-module(yes_11).
+-compile(export_all).
+
+?MODULE() ->
+    ok.
+
+%% Artifical example to cover more code in beam_receive.
+do_call(Process, Request) ->
+    Mref = erlang:monitor(process, Process),
+    Process ! Request,
+    receive
+	{X,Y,Z} when Mref =/= X, Z =:= 42, Mref =:= Y ->
+	    error;
+	{X,Y,_} when Mref =/= X, Mref =:= Y ->
+	    error;
+	{Mref, Reply} ->
+	    erlang:demonitor(Mref, [flush]),
+	    {ok, Reply};
+	{'DOWN', Mref, _, _, _} ->
+	    error
+    end.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_12.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_12.erl
new file mode 100644
index 0000000000..efcfed6059
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_12.erl
@@ -0,0 +1,12 @@
+-module(yes_12).
+-compile(export_all).
+
+?MODULE() ->
+    ok.
+
+f() ->
+    {_,Ref} = spawn_monitor(fun() -> ok end),
+    receive
+	{'DOWN',Ref,_,_,Reason} ->
+	    Reason
+    end.
diff --git a/lib/compiler/test/receive_SUITE_data/ref_opt/yes_13.erl b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_13.erl
new file mode 100644
index 0000000000..9e93d12ed6
--- /dev/null
+++ b/lib/compiler/test/receive_SUITE_data/ref_opt/yes_13.erl
@@ -0,0 +1,12 @@
+-module(yes_13).
+-compile(export_all).
+
+?MODULE() ->
+    ok.
+
+f() ->
+    {Pid,Ref} = spawn_monitor(fun() -> ok end),
+    receive
+	{'DOWN',Ref,process,Pid,Reason} ->
+	    Reason
+    end.
diff --git a/lib/compiler/test/record_SUITE.erl b/lib/compiler/test/record_SUITE.erl
index 363422ec7e..96f3712be9 100644
--- a/lib/compiler/test/record_SUITE.erl
+++ b/lib/compiler/test/record_SUITE.erl
@@ -42,12 +42,14 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [errors, record_test_2, record_test_3,
-     record_access_in_guards, guard_opt, eval_once, foobar,
-     missing_test_heap, nested_access, coverage].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [errors,record_test_2,record_test_3,
+       record_access_in_guards,guard_opt,eval_once,foobar,
+       missing_test_heap,nested_access,coverage]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl
index 2295592a38..996c369705 100644
--- a/lib/compiler/test/test_lib.erl
+++ b/lib/compiler/test/test_lib.erl
@@ -20,7 +20,8 @@
 
 -include("test_server.hrl").
 -compile({no_auto_import,[binary_part/2]}).
--export([recompile/1,opt_opts/1,get_data_dir/1,smoke_disasm/1,p_run/2,binary_part/2]).
+-export([recompile/1,parallel/0,uniq/0,opt_opts/1,get_data_dir/1,
+	 smoke_disasm/1,p_run/2,binary_part/2]).
 
 recompile(Mod) when is_atom(Mod) ->
     case whereis(cover_server) of
@@ -43,6 +44,18 @@ smoke_disasm(File) when is_list(File) ->
     Res = beam_disasm:file(File),
     {beam_file,_Mod} = {element(1, Res),element(2, Res)}.
 
+parallel() ->
+    case ?t:is_cover() orelse erlang:system_info(schedulers) =:= 1 of
+	true -> [];
+	false -> [parallel]
+    end.
+
+uniq() ->
+    U0 = erlang:ref_to_list(make_ref()),
+    U1 = re:replace(U0, "^#Ref", ""),
+    U = re:replace(U1, "[^[A-Za-z0-9_]+", "_", [global]),
+    re:replace(U, "_*$", "", [{return,list}]).
+
 %% Retrieve the "interesting" compiler options (options for optimization
 %% and compatibility) for the given module.
 
diff --git a/lib/compiler/test/trycatch_SUITE.erl b/lib/compiler/test/trycatch_SUITE.erl
index 29119c0f5d..4530d08c77 100644
--- a/lib/compiler/test/trycatch_SUITE.erl
+++ b/lib/compiler/test/trycatch_SUITE.erl
@@ -32,13 +32,15 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [basic, lean_throw, try_of, try_after, catch_oops,
-     after_oops, eclectic, rethrow, nested_of, nested_catch,
-     nested_after, nested_horrid, last_call_optimization,
-     bool, plain_catch_coverage, andalso_orelse, get_in_try].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [basic,lean_throw,try_of,try_after,catch_oops,
+       after_oops,eclectic,rethrow,nested_of,nested_catch,
+       nested_after,nested_horrid,last_call_optimization,
+       bool,plain_catch_coverage,andalso_orelse,get_in_try]}].
+
 
 init_per_suite(Config) ->
     Config.
diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl
index f6a572abfa..9ce0df5ec4 100644
--- a/lib/compiler/test/warnings_SUITE.erl
+++ b/lib/compiler/test/warnings_SUITE.erl
@@ -55,12 +55,13 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     test_lib:recompile(?MODULE),
-    [pattern, pattern2, pattern3, pattern4, guard,
-     bad_arith, bool_cases, bad_apply, files, effect,
-     bin_opt_info, bin_construction].
+    [{group,p}].
 
 groups() -> 
-    [].
+    [{p,test_lib:parallel(),
+      [pattern,pattern2,pattern3,pattern4,guard,
+       bad_arith,bool_cases,bad_apply,files,effect,
+       bin_opt_info,bin_construction]}].
 
 init_per_suite(Config) ->
     Config.
@@ -556,9 +557,10 @@ run(Config, Tests) ->
 %% Compiles a test module and returns the list of errors and warnings.
 
 run_test(Conf, Test0, Warnings) ->
-    Filename = 'warnings_test.erl',
+    Mod = "warnings_"++test_lib:uniq(),
+    Filename = Mod ++ ".erl",
     ?line DataDir = ?privdir,
-    ?line Test = ["-module(warnings_test). ", Test0],
+    Test = ["-module(", Mod, "). ", Test0],
     ?line File = filename:join(DataDir, Filename),
     ?line Opts = [binary,export_all,return|Warnings],
     ?line ok = file:write_file(File, Test),