25 files changed, 941 insertions, 649 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 299b2892fc..f75beaba20 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -50,6 +50,7 @@ MODULES =  \
 	beam_asm \
 	beam_block \
 	beam_bool \
+	beam_bs \
 	beam_bsm \
 	beam_clean \
 	beam_dead \
@@ -62,6 +63,7 @@ MODULES =  \
 	beam_opcodes \
 	beam_peep \
 	beam_receive \
+	beam_reorder \
 	beam_split \
 	beam_trim \
 	beam_type \
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index a3201b0f4a..95be471de3 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -30,11 +30,12 @@
 module(Code, Abst, SourceFile, Opts) ->
     {ok,assemble(Code, Abst, SourceFile, Opts)}.
 
-assemble({Mod,Exp,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) ->
+assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) ->
     {1,Dict0} = beam_dict:atom(Mod, beam_dict:new()),
     {0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0),
     NumFuncs = length(Asm0),
     {Asm,Attr} = on_load(Asm0, Attr0),
+    Exp = cerl_sets:from_list(Exp0),
     {Code,Dict2} = assemble_1(Asm, Exp, Dict1, []),
     build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts).
 
@@ -61,7 +62,7 @@ insert_on_load_instruction(Is0, Entry) ->
     Bef ++ [El,on_load|Is].
 
 assemble_1([{function,Name,Arity,Entry,Asm}|T], Exp, Dict0, Acc) ->
-    Dict1 = case member({Name,Arity}, Exp) of
+    Dict1 = case cerl_sets:is_element({Name,Arity}, Exp) of
 		true ->
 		    beam_dict:export(Name, Arity, Entry, Dict0);
 		false ->
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 0321b1c07b..10dbaf462c 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -23,14 +23,13 @@
 -module(beam_block).
 
 -export([module/2]).
--import(lists, [mapfoldl/3,reverse/1,reverse/2,foldl/3,member/2]).
--define(MAXREG, 1024).
+-import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
 
-module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
-    {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0),
+module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+    Fs = [function(F) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
-function({function,Name,Arity,CLabel,Is0}, Lc0) ->
+function({function,Name,Arity,CLabel,Is0}) ->
     try
 	%% Collect basic blocks and optimize them.
 	Is1 = blockify(Is0),
@@ -40,11 +39,8 @@ function({function,Name,Arity,CLabel,Is0}, Lc0) ->
 	Is5 = opt_blocks(Is4),
 	Is6 = beam_utils:delete_live_annos(Is5),
 
-	%% Optimize bit syntax.
-	{Is,Lc} = bsm_opt(Is6, Lc0),
-
 	%% Done.
-	{{function,Name,Arity,CLabel,Is},Lc}
+	{function,Name,Arity,CLabel,Is6}
     catch
 	Class:Error ->
 	    Stack = erlang:get_stacktrace(),
@@ -62,56 +58,15 @@ blockify(Is) ->
 blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
     %% Useless instruction sequence.
     blockify(Is, Acc);
-blockify([{test,is_atom,{f,Fail},[Reg]}=I|
-	  [{select,select_val,Reg,{f,Fail},
-	    [{atom,false},{f,_}=BrFalse,
-	     {atom,true}=AtomTrue,{f,_}=BrTrue]}|Is]=Is0],
-	 [{block,Bl}|_]=Acc) ->
-    case is_last_bool(Bl, Reg) of
-	false ->
-	    blockify(Is0, [I|Acc]);
-	true ->
-	    %% The last instruction is a boolean operator/guard BIF that can't fail.
-	    %% We can convert the three-way branch to a two-way branch (eliminating
-	    %% the reference to the failure label).
-	    blockify(Is, [{jump,BrTrue},
-			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
-    end;
-blockify([{test,is_atom,{f,Fail},[Reg]}=I|
-	  [{select,select_val,Reg,{f,Fail},
-	    [{atom,true}=AtomTrue,{f,_}=BrTrue,
-	     {atom,false},{f,_}=BrFalse]}|Is]=Is0],
-	 [{block,Bl}|_]=Acc) ->
-    case is_last_bool(Bl, Reg) of
-	false ->
-	    blockify(Is0, [I|Acc]);
-	true ->
-	    blockify(Is, [{jump,BrTrue},
-			  {test,is_eq_exact,BrFalse,[Reg,AtomTrue]}|Acc])
-    end;
 blockify([I|Is0]=IsAll, Acc) ->
-    case is_bs_put(I) of
-	true ->
-	    {BsPuts0,Is} = collect_bs_puts(IsAll),
-	    BsPuts = opt_bs_puts(BsPuts0),
-	    blockify(Is, reverse(BsPuts, Acc));
-	false ->
-	    case collect(I) of
-		error -> blockify(Is0, [I|Acc]);
-		Instr when is_tuple(Instr) ->
-		    {Block,Is} = collect_block(IsAll),
-		    blockify(Is, [{block,Block}|Acc])
-	    end
+    case collect(I) of
+	error -> blockify(Is0, [I|Acc]);
+	Instr when is_tuple(Instr) ->
+	    {Block,Is} = collect_block(IsAll),
+	    blockify(Is, [{block,Block}|Acc])
     end;
 blockify([], Acc) -> reverse(Acc).
 
-is_last_bool([{set,[Reg],As,{bif,N,_}}], Reg) ->
-    Ar = length(As),
-    erl_internal:new_type_test(N, Ar) orelse erl_internal:comp_op(N, Ar)
-	orelse erl_internal:bool_op(N, Ar);
-is_last_bool([_|Is], Reg) -> is_last_bool(Is, Reg);
-is_last_bool([], _) -> false.
-
 collect_block(Is) ->
     collect_block(Is, []).
 
@@ -149,7 +104,10 @@ collect({put_map,F,Op,S,D,R,{list,Puts}}) ->
 collect({get_map_elements,F,S,{list,Gets}}) ->
     {Ss,Ds} = beam_utils:split_even(Gets),
     {set,Ds,[S|Ss],{get_map_elements,F}};
-collect({'catch',R,L})       -> {set,[R],[],{'catch',L}};
+collect({'catch'=Op,R,L}) ->
+    {set,[R],[],{try_catch,Op,L}};
+collect({'try'=Op,R,L}) ->
+    {set,[R],[],{try_catch,Op,L}};
 collect(fclearerror)         -> {set,[],[],fclearerror};
 collect({fcheckerror,{f,0}}) -> {set,[],[],fcheckerror};
 collect({fmove,S,D})         -> {set,[D],[S],fmove};
@@ -183,7 +141,9 @@ opt_blocks([I|Is]) ->
 opt_blocks([]) -> [].
 
 opt_block(Is0) ->
-    Is = find_fixpoint(fun opt/1, Is0),
+    Is = find_fixpoint(fun(Is) ->
+			       opt_tuple_element(opt(Is))
+		       end, Is0),
     opt_alloc(Is).
 
 find_fixpoint(OptFun, Is0) ->
@@ -279,76 +239,151 @@ opt_moves([X0,Y0], Is0) ->
 	not_possible -> {[X,Y0],Is2};
 	{X,_} -> {[X,Y0],Is2};
 	{Y,Is} -> {[X,Y],Is}
-    end;
-opt_moves(Ds, Is) ->
-    %% multiple destinations -> pass through
-    {Ds,Is}.
-
+    end.
 
 %% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible
 %%  If there is a {move,Dest,FinalDest} instruction
 %%  in the instruction stream, remove the move instruction
 %%  and let FinalDest be the destination.
-%%
-%%  For this optimization to be safe, we must be sure that
-%%  Dest will not be referenced in any other by other instructions
-%%  in the rest of the instruction stream. Not even the indirect
-%%  reference by an instruction that may allocate (such as
-%%  test_heap/2 or a GC Bif) is allowed.
 
 opt_move(Dest, Is) ->
-    opt_move_1(Dest, Is, ?MAXREG, []).
-
-opt_move_1(R, [{set,_,_,{alloc,Live,_}}|_]=Is, SafeRegs, Acc) when Live < SafeRegs ->
-    %% Downgrade number of safe regs and rescan the instruction, as it most probably
-    %% is a gc_bif instruction.
-    opt_move_1(R, Is, Live, Acc);
-opt_move_1(R, [{set,[{x,X}=D],[R],move}|Is], SafeRegs, Acc) ->
-    case X < SafeRegs andalso beam_utils:is_killed_block(R, Is) of
-	true -> opt_move_2(D, Acc, Is);
-	false -> not_possible
+    opt_move_1(Dest, Is, []).
+
+opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) ->
+    %% Provided that the source register is killed by instructions
+    %% that follow, the optimization is safe.
+    case eliminate_use_of_from_reg(Is0, R, D, []) of
+	{yes,Is} -> opt_move_rev(D, Acc, Is);
+	no -> not_possible
     end;
-opt_move_1(R, [{set,[D],[R],move}|Is], _SafeRegs, Acc) ->
-    case beam_utils:is_killed_block(R, Is) of
-	true -> opt_move_2(D, Acc, Is);
-	false -> not_possible
+opt_move_1({x,_}, [{set,_,_,{alloc,_,_}}|_], _) ->
+    %% The optimization is not possible. If the X register is not
+    %% killed by allocation, the optimization would not be safe.
+    %% If the X register is killed, it means that there cannot
+    %% follow a 'move' instruction with this X register as the
+    %% source.
+    not_possible;
+opt_move_1(R, [{set,_,_,_}=I|Is], Acc) ->
+    %% If the source register is either killed or used by this
+    %% instruction, the optimimization is not possible.
+    case is_killed_or_used(R, I) of
+	true -> not_possible;
+	false -> opt_move_1(R, Is, [I|Acc])
     end;
-opt_move_1(R, [I|Is], SafeRegs, Acc) ->
-    case is_transparent(R, I) of
-	false -> not_possible;
-	true -> opt_move_1(R, Is, SafeRegs, [I|Acc])
-    end.
+opt_move_1(_, _, _) ->
+    not_possible.
+
+%% opt_tuple_element([Instruction]) -> [Instruction]
+%%  If possible, move get_tuple_element instructions forward
+%%  in the instruction stream to a move instruction, eliminating
+%%  the move instruction. Example:
+%%
+%%    get_tuple_element Tuple Pos Dst1
+%%    ...
+%%    move Dst1 Dst2
+%%
+%%  This code may be possible to rewrite to:
+%%
+%%    %%(Moved get_tuple_element instruction)
+%%    ...
+%%    get_tuple_element Tuple Pos Dst2
+%%
 
-%% Reverse the instructions, while checking that there are no instructions that
-%% would interfere with using the new destination register chosen.
+opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) ->
+    case opt_tuple_element_1(Is0, I, {S,D}, []) of
+	no ->
+	    [I|opt_tuple_element(Is0)];
+	{yes,Is} ->
+	    opt_tuple_element(Is)
+    end;
+opt_tuple_element([I|Is]) ->
+    [I|opt_tuple_element(Is)];
+opt_tuple_element([]) -> [].
+
+opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) ->
+    no;
+opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) ->
+    no;
+opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) ->
+    case eliminate_use_of_from_reg(Is0, S, D, []) of
+	no ->
+	    no;
+	{yes,Is} ->
+	    {set,[S],Ss,Op} = I0,
+	    I = {set,[D],Ss,Op},
+	    {yes,reverse(Acc, [I|Is])}
+    end;
+opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) ->
+    case member(S, Ds) orelse member(D, Ss) of
+	true ->
+	    no;
+	false ->
+	    opt_tuple_element_1(Is, MovedI, Regs, [I|Acc])
+    end;
+opt_tuple_element_1(_, _, _, _) -> no.
+
+%% Reverse the instructions, while checking that there are no
+%% instructions that would interfere with using the new destination
+%% register (D).
 
-opt_move_2(D, [I|Is], Acc) ->
-    case is_transparent(D, I) of
-	false -> not_possible;
-	true -> opt_move_2(D, Is, [I|Acc])
+opt_move_rev(D, [I|Is], Acc) ->
+    case is_killed_or_used(D, I) of
+	true -> not_possible;
+	false -> opt_move_rev(D, Is, [I|Acc])
+    end;
+opt_move_rev(D, [], Acc) -> {D,Acc}.
+
+%% is_killed_or_used(Register, {set,_,_,_}) -> bool()
+%%  Test whether the register is used by the instruction.
+
+is_killed_or_used(R, {set,Ss,Ds,_}) ->
+    member(R, Ds) orelse member(R, Ss).
+
+%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) ->
+%%       {yes,Is} | no
+%%  Eliminate any use of FromRegister in the instruction sequence
+%%  by replacing uses of FromRegister with ToRegister. If FromRegister
+%%  is referenced by an allocation instruction, return 'no' to indicate
+%%  that FromRegister is still used and that the optimization is not
+%%  possible.
+
+eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
+    if
+	X < Live ->
+	    no;
+	true ->
+	    {yes,reverse(Acc, Is0)}
     end;
-opt_move_2(D, [], Acc) -> {D,Acc}.
-
-%% is_transparent(Register, Instruction) -> true | false
-%%  Returns true if Instruction does not in any way references Register
-%%  (even indirectly by an allocation instruction).
-%%  Returns false if Instruction does reference Register, or we are
-%%  not sure.
-
-is_transparent({x,X}, {set,_,_,{alloc,Live,_}}) when X < Live ->
-    false;
-is_transparent(R, {set,Ds,Ss,_Op}) ->
-    case member(R, Ds) of
-	true -> false;
-	false -> not member(R, Ss)
+eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) ->
+    I = case member(From, Ss0) of
+	    true ->
+		Ss = [case S of
+			  From -> To;
+			  _ -> S
+		      end || S <- Ss0],
+		{set,Ds,Ss,Op};
+	    false ->
+		I0
+	end,
+    case member(From, Ds) of
+	true ->
+	    {yes,reverse(Acc, [I|Is])};
+	false ->
+	    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
     end;
-is_transparent(_, _) -> false.
+eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
+    case beam_utils:is_killed_block(From, [I]) of
+	true ->
+	    {yes,reverse(Acc, Is)};
+	false ->
+	    no
+    end.
 
 %% opt_alloc(Instructions) -> Instructions'
 %%  Optimises all allocate instructions.
 
 opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
-    [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|opt(Is)];
+    [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
 opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
 opt_alloc([]) -> [].
 	
@@ -414,234 +449,3 @@ x_dead([], Regs) -> Regs.
 x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
 x_live([_|Rs], Regs) -> x_live(Rs, Regs);
 x_live([], Regs) -> Regs.
-
-%%%
-%%% Evaluation of constant bit fields.
-%%%
-
-is_bs_put({bs_put,_,{bs_put_integer,_,_},_}) -> true;
-is_bs_put({bs_put,_,{bs_put_float,_,_},_}) -> true;
-is_bs_put(_) -> false.
-
-collect_bs_puts(Is) ->
-    collect_bs_puts_1(Is, []).
-    
-collect_bs_puts_1([I|Is]=Is0, Acc) ->
-    case is_bs_put(I) of
-	false -> {reverse(Acc),Is0};
-	true -> collect_bs_puts_1(Is, [I|Acc])
-    end.
-    
-opt_bs_puts(Is) ->
-    opt_bs_1(Is, []).
-
-opt_bs_1([{bs_put,Fail,
-	   {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) ->
-    try eval_put_float(Src, Sz, Flags0) of
-	<<Int:Sz>> ->
-	    Flags = force_big(Flags0),
-	    I = {bs_put,Fail,{bs_put_integer,1,Flags},
-		 [{integer,Sz},{integer,Int}]},
-	    opt_bs_1([I|Is], Acc)
-    catch
-	error:_ ->
-	    opt_bs_1(Is, [I0|Acc])
-    end;
-opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll,
-	 Acc0) ->
-    {Is,Acc} = bs_collect_string(IsAll, Acc0),
-    opt_bs_1(Is, Acc);
-opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0],
-	 Acc) when Sz > 8 ->
-    case field_endian(F) of
-	big ->
-	    %% We can do this optimization for any field size without risk
-	    %% for code explosion.
-	    case bs_split_int(N, Sz, Fail, Is0) of
-		no_split -> opt_bs_1(Is0, [I|Acc]);
-		Is -> opt_bs_1(Is, Acc)
-	    end;
-	little when Sz < 128 ->
-	    %% We only try to optimize relatively small fields, to avoid
-	    %% an explosion in code size.
-	    <<Int:Sz>> = <<N:Sz/little>>,
-	    Flags = force_big(F),
-	    Is = [{bs_put,Fail,{bs_put_integer,1,Flags},
-		   [{integer,Sz},{integer,Int}]}|Is0],
-	    opt_bs_1(Is, Acc);
-	_ -> 					%native or too wide little field
-	    opt_bs_1(Is0, [I|Acc])
-    end;
-opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 ->
-    opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc);
-opt_bs_1([I|Is], Acc) ->
-    opt_bs_1(Is, [I|Acc]);
-opt_bs_1([], Acc) -> reverse(Acc).
-
-eval_put_float(Src, Sz, Flags) when Sz =< 256 -> %Only evaluate if Sz is reasonable.
-    Val = value(Src),
-    case field_endian(Flags) of
-	little -> <<Val:Sz/little-float-unit:1>>;
-	big -> <<Val:Sz/big-float-unit:1>>
-        %% native intentionally not handled here - we can't optimize it.
-    end.
-
-value({integer,I}) -> I;
-value({float,F}) -> F.
-
-bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) ->
-    bs_coll_str_1(Is, Len, reverse(Str), Acc);
-bs_collect_string(Is, Acc) ->
-    bs_coll_str_1(Is, 0, [], Acc).
-    
-bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is],
-	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
-    Byte = V band 16#FF,
-    bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
-bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
-    {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}.
-
-field_endian({field_flags,F}) -> field_endian_1(F).
-
-field_endian_1([big=E|_]) -> E;
-field_endian_1([little=E|_]) -> E;
-field_endian_1([native=E|_]) -> E;
-field_endian_1([_|Fs]) -> field_endian_1(Fs).
-
-force_big({field_flags,F}) ->
-    {field_flags,force_big_1(F)}.
-
-force_big_1([big|_]=Fs) -> Fs;
-force_big_1([little|Fs]) -> [big|Fs];
-force_big_1([F|Fs]) -> [F|force_big_1(Fs)].
-
-bs_split_int(0, Sz, _, _) when Sz > 64 ->
-    %% We don't want to split in this case because the
-    %% string will consist of only zeroes.
-    no_split;
-bs_split_int(-1, Sz, _, _) when Sz > 64 ->
-    %% We don't want to split in this case because the
-    %% string will consist of only 255 bytes.
-    no_split;
-bs_split_int(N, Sz, Fail, Acc) ->
-    FirstByteSz = case Sz rem 8 of
-		      0 -> 8;
-		      Rem -> Rem
-		  end,
-    bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
-
-bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,Sz},{integer,-1}]},
-    [I|Acc];
-bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,Sz},{integer,0}]},
-    [I|Acc];
-bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
-    Mask = (1 bsl ByteSz) - 1,
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,ByteSz},{integer,N band Mask}]},
-    bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
-bs_split_int_1(_, _, _, _, Acc) -> Acc.
-
-
-%%%
-%%% Optimization of new bit syntax matching: get rid
-%%% of redundant bs_restore2/2 instructions across select_val
-%%% instructions, as well as a few other simple peep-hole optimizations.
-%%%
-
-bsm_opt(Is0, Lc0) ->
-    {Is1,D0,Lc} = bsm_scan(Is0, [], Lc0, []),
-    Is2 = case D0 of
-	      [] ->
-		  Is1;
-	     _ ->
-		  D = gb_trees:from_orddict(orddict:from_list(D0)),
-		  bsm_reroute(Is1, D, none, [])
-	 end,
-    Is = beam_clean:bs_clean_saves(Is2),
-    {bsm_opt_2(Is, []),Lc}.
-
-bsm_scan([{label,L}=Lbl,{bs_restore2,_,Save}=R|Is], D0, Lc, Acc0) ->
-    D = [{{L,Save},Lc}|D0],
-    Acc = [{label,Lc},R,Lbl|Acc0],
-    bsm_scan(Is, D, Lc+1, Acc);
-bsm_scan([I|Is], D, Lc, Acc) ->
-    bsm_scan(Is, D, Lc, [I|Acc]);
-bsm_scan([], D, Lc, Acc) ->
-    {reverse(Acc),D,Lc}.
-
-bsm_reroute([{bs_save2,Reg,Save}=I|Is], D, _, Acc) ->
-    bsm_reroute(Is, D, {Reg,Save}, [I|Acc]);
-bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) ->
-    bsm_reroute(Is, D, {Reg,Save}, [I|Acc]);
-bsm_reroute([{label,_}=I|Is], D, S, Acc) ->
-    bsm_reroute(Is, D, S, [I|Acc]);
-bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) ->
-    [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D),
-    Acc = [{select,select_val,Reg,F,Lbls}|Acc0],
-    bsm_reroute(Is, D, S, Acc);
-bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) ->
-    F = bsm_subst_label(F0, Save, D),
-    Acc = [{test,TestOp,F,TestArgs}|Acc0],
-    case bsm_not_bs_test(I) of
-	true ->
-	    %% The test instruction will not update the bit offset for the
-	    %% binary being matched. Therefore the save position can be kept.
-	    bsm_reroute(Is, D, S, Acc);
-	false ->
-	    %% The test instruction might update the bit offset. Kill our
-	    %% remembered Save position.
-	    bsm_reroute(Is, D, none, Acc)
-    end;
-bsm_reroute([{test,TestOp,F0,Live,TestArgs,Dst}|Is], D, {_,Save}, Acc0) ->
-    F = bsm_subst_label(F0, Save, D),
-    Acc = [{test,TestOp,F,Live,TestArgs,Dst}|Acc0],
-    %% The test instruction will update the bit offset. Kill our
-    %% remembered Save position.
-    bsm_reroute(Is, D, none, Acc);
-bsm_reroute([{block,[{set,[],[],{alloc,_,_}}]}=Bl,
-	     {bs_context_to_binary,_}=I|Is], D, S, Acc) ->
-    %% To help further bit syntax optimizations.
-    bsm_reroute([I,Bl|Is], D, S, Acc);
-bsm_reroute([I|Is], D, _, Acc) ->
-    bsm_reroute(Is, D, none, [I|Acc]);
-bsm_reroute([], _, _, Acc) -> reverse(Acc).
-
-bsm_opt_2([{test,bs_test_tail2,F,[Ctx,Bits]}|Is],
-	  [{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}|Acc]) ->
-    bsm_opt_2(Is, [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|Acc]);
-bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is],
-	  [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) ->
-    bsm_opt_2(Is, [{test,bs_skip_bits2,F,
-		    [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
-bsm_opt_2([I|Is], Acc) ->
-    bsm_opt_2(Is, [I|Acc]);
-bsm_opt_2([], Acc) -> reverse(Acc).
-
-%% bsm_not_bs_test({test,Name,_,Operands}) -> true|false.
-%%  Test whether is the test is a "safe", i.e. does not move the
-%%  bit offset for a binary.
-%%
-%%  'true' means that the test is safe, 'false' that we don't know or
-%%  that the test moves the offset (e.g. bs_get_integer2).
-
-bsm_not_bs_test({test,bs_test_tail2,_,[_,_]}) -> true;
-bsm_not_bs_test(Test) -> beam_utils:is_pure_test(Test).
-
-bsm_subst_labels(Fs, Save, D) ->
-    bsm_subst_labels_1(Fs, Save, D, []).
-
-bsm_subst_labels_1([F|Fs], Save, D, Acc) ->
-    bsm_subst_labels_1(Fs, Save, D, [bsm_subst_label(F, Save, D)|Acc]);
-bsm_subst_labels_1([], _, _, Acc) ->
-    reverse(Acc).
-
-bsm_subst_label({f,Lbl0}=F, Save, D) ->
-    case gb_trees:lookup({Lbl0,Save}, D) of
-	{value,Lbl} -> {f,Lbl};
-	none -> F
-    end;
-bsm_subst_label(Other, _, _) -> Other.
diff --git a/lib/compiler/src/beam_bool.erl b/lib/compiler/src/beam_bool.erl
index 14b6381230..c9e103eae9 100644
--- a/lib/compiler/src/beam_bool.erl
+++ b/lib/compiler/src/beam_bool.erl
@@ -25,8 +25,6 @@
 
 -import(lists, [reverse/1,reverse/2,foldl/3,mapfoldl/3,map/2]).
 
--define(MAXREG, 1024).
-
 -record(st,
 	{next,					%Next label number.
 	 ll					%Live regs at labels.
diff --git a/lib/compiler/src/beam_bs.erl b/lib/compiler/src/beam_bs.erl
new file mode 100644
index 0000000000..55fa7ce10c
--- /dev/null
+++ b/lib/compiler/src/beam_bs.erl
@@ -0,0 +1,278 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1999-2013. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose : Partitions assembly instructions into basic blocks and
+%% optimizes them.
+
+-module(beam_bs).
+
+-export([module/2]).
+-import(lists, [mapfoldl/3,reverse/1]).
+
+module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
+    {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0),
+    {ok,{Mod,Exp,Attr,Fs,Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}, Lc0) ->
+    try
+	Is1 = bs_put_opt(Is0),
+	{Is,Lc} = bsm_opt(Is1, Lc0),
+	{{function,Name,Arity,CLabel,Is},Lc}
+    catch
+	Class:Error ->
+	    Stack = erlang:get_stacktrace(),
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
+
+%%%
+%%% Evaluation of constant bit fields.
+%%%
+
+bs_put_opt([{bs_put,_,_,_}=I|Is0]) ->
+    {BsPuts0,Is} = collect_bs_puts(Is0, [I]),
+    BsPuts = opt_bs_puts(BsPuts0),
+    BsPuts ++ bs_put_opt(Is);
+bs_put_opt([I|Is]) ->
+    [I|bs_put_opt(Is)];
+bs_put_opt([]) -> [].
+
+collect_bs_puts([{bs_put,_,_,_}=I|Is], Acc) ->
+    collect_bs_puts(Is, [I|Acc]);
+collect_bs_puts([_|_]=Is, Acc) ->
+    {reverse(Acc),Is}.
+
+opt_bs_puts(Is) ->
+    opt_bs_1(Is, []).
+
+opt_bs_1([{bs_put,Fail,
+	   {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) ->
+    try eval_put_float(Src, Sz, Flags0) of
+	<<Int:Sz>> ->
+	    Flags = force_big(Flags0),
+	    I = {bs_put,Fail,{bs_put_integer,1,Flags},
+		 [{integer,Sz},{integer,Int}]},
+	    opt_bs_1([I|Is], Acc)
+    catch
+	error:_ ->
+	    opt_bs_1(Is, [I0|Acc])
+    end;
+opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll,
+	 Acc0) ->
+    {Is,Acc} = bs_collect_string(IsAll, Acc0),
+    opt_bs_1(Is, Acc);
+opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0],
+	 Acc) when Sz > 8 ->
+    case field_endian(F) of
+	big ->
+	    %% We can do this optimization for any field size without
+	    %% risk for code explosion.
+	    case bs_split_int(N, Sz, Fail, Is0) of
+		no_split -> opt_bs_1(Is0, [I|Acc]);
+		Is -> opt_bs_1(Is, Acc)
+	    end;
+	little when Sz < 128 ->
+	    %% We only try to optimize relatively small fields, to
+	    %% avoid an explosion in code size.
+	    <<Int:Sz>> = <<N:Sz/little>>,
+	    Flags = force_big(F),
+	    Is = [{bs_put,Fail,{bs_put_integer,1,Flags},
+		   [{integer,Sz},{integer,Int}]}|Is0],
+	    opt_bs_1(Is, Acc);
+	_ ->			      %native or too wide little field
+	    opt_bs_1(Is0, [I|Acc])
+    end;
+opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 ->
+    opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc);
+opt_bs_1([I|Is], Acc) ->
+    opt_bs_1(Is, [I|Acc]);
+opt_bs_1([], Acc) -> reverse(Acc).
+
+eval_put_float(Src, Sz, Flags) when Sz =< 256 ->
+    %%Only evaluate if Sz is reasonable.
+    Val = value(Src),
+    case field_endian(Flags) of
+	little -> <<Val:Sz/little-float-unit:1>>;
+	big -> <<Val:Sz/big-float-unit:1>>
+        %% native intentionally not handled here - we can't optimize
+        %% it.
+    end.
+
+value({integer,I}) -> I;
+value({float,F}) -> F.
+
+bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) ->
+    bs_coll_str_1(Is, Len, reverse(Str), Acc);
+bs_collect_string(Is, Acc) ->
+    bs_coll_str_1(Is, 0, [], Acc).
+
+bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is],
+	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
+    Byte = V band 16#FF,
+    bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
+bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
+    {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}.
+
+field_endian({field_flags,F}) -> field_endian_1(F).
+
+field_endian_1([big=E|_]) -> E;
+field_endian_1([little=E|_]) -> E;
+field_endian_1([native=E|_]) -> E;
+field_endian_1([_|Fs]) -> field_endian_1(Fs).
+
+force_big({field_flags,F}) ->
+    {field_flags,force_big_1(F)}.
+
+force_big_1([big|_]=Fs) -> Fs;
+force_big_1([little|Fs]) -> [big|Fs];
+force_big_1([F|Fs]) -> [F|force_big_1(Fs)].
+
+bs_split_int(0, Sz, _, _) when Sz > 64 ->
+    %% We don't want to split in this case because the
+    %% string will consist of only zeroes.
+    no_split;
+bs_split_int(-1, Sz, _, _) when Sz > 64 ->
+    %% We don't want to split in this case because the
+    %% string will consist of only 255 bytes.
+    no_split;
+bs_split_int(N, Sz, Fail, Acc) ->
+    FirstByteSz = case Sz rem 8 of
+		      0 -> 8;
+		      Rem -> Rem
+		  end,
+    bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
+
+bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 ->
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,Sz},{integer,-1}]},
+    [I|Acc];
+bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 ->
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,Sz},{integer,0}]},
+    [I|Acc];
+bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
+    Mask = (1 bsl ByteSz) - 1,
+    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
+	 [{integer,ByteSz},{integer,N band Mask}]},
+    bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
+bs_split_int_1(_, _, _, _, Acc) -> Acc.
+
+%%%
+%%% Optimization of bit syntax matching: get rid
+%%% of redundant bs_restore2/2 instructions across select_val
+%%% instructions, as well as a few other simple peep-hole
+%%% optimizations.
+%%%
+
+bsm_opt(Is0, Lc0) ->
+    {Is1,D0,Lc} = bsm_scan(Is0, [], Lc0, []),
+    Is2 = case D0 of
+	      [] ->
+		  %% No bit syntax matching in this function.
+		  Is1;
+	      [_|_] ->
+		  %% Optimize the bit syntax matching.
+		  D = gb_trees:from_orddict(orddict:from_list(D0)),
+		  bsm_reroute(Is1, D, none, [])
+	 end,
+    Is = beam_clean:bs_clean_saves(Is2),
+    {bsm_opt_2(Is, []),Lc}.
+
+bsm_scan([{label,L}=Lbl,{bs_restore2,_,Save}=R|Is], D0, Lc, Acc0) ->
+    D = [{{L,Save},Lc}|D0],
+    Acc = [{label,Lc},R,Lbl|Acc0],
+    bsm_scan(Is, D, Lc+1, Acc);
+bsm_scan([I|Is], D, Lc, Acc) ->
+    bsm_scan(Is, D, Lc, [I|Acc]);
+bsm_scan([], D, Lc, Acc) ->
+    {reverse(Acc),D,Lc}.
+
+bsm_reroute([{bs_save2,Reg,Save}=I|Is], D, _, Acc) ->
+    bsm_reroute(Is, D, {Reg,Save}, [I|Acc]);
+bsm_reroute([{bs_restore2,Reg,Save}=I|Is], D, _, Acc) ->
+    bsm_reroute(Is, D, {Reg,Save}, [I|Acc]);
+bsm_reroute([{label,_}=I|Is], D, S, Acc) ->
+    bsm_reroute(Is, D, S, [I|Acc]);
+bsm_reroute([{select,select_val,Reg,F0,Lbls0}|Is], D, {_,Save}=S, Acc0) ->
+    [F|Lbls] = bsm_subst_labels([F0|Lbls0], Save, D),
+    Acc = [{select,select_val,Reg,F,Lbls}|Acc0],
+    bsm_reroute(Is, D, S, Acc);
+bsm_reroute([{test,TestOp,F0,TestArgs}=I|Is], D, {_,Save}=S, Acc0) ->
+    F = bsm_subst_label(F0, Save, D),
+    Acc = [{test,TestOp,F,TestArgs}|Acc0],
+    case bsm_not_bs_test(I) of
+	true ->
+	    %% The test instruction will not update the bit offset for
+	    %% the binary being matched. Therefore the save position
+	    %% can be kept.
+	    bsm_reroute(Is, D, S, Acc);
+	false ->
+	    %% The test instruction might update the bit offset. Kill
+	    %% our remembered Save position.
+	    bsm_reroute(Is, D, none, Acc)
+    end;
+bsm_reroute([{test,TestOp,F0,Live,TestArgs,Dst}|Is], D, {_,Save}, Acc0) ->
+    F = bsm_subst_label(F0, Save, D),
+    Acc = [{test,TestOp,F,Live,TestArgs,Dst}|Acc0],
+    %% The test instruction will update the bit offset. Kill our
+    %% remembered Save position.
+    bsm_reroute(Is, D, none, Acc);
+bsm_reroute([{block,[{set,[],[],{alloc,_,_}}]}=Bl,
+	     {bs_context_to_binary,_}=I|Is], D, S, Acc) ->
+    %% To help further bit syntax optimizations.
+    bsm_reroute([I,Bl|Is], D, S, Acc);
+bsm_reroute([I|Is], D, _, Acc) ->
+    bsm_reroute(Is, D, none, [I|Acc]);
+bsm_reroute([], _, _, Acc) -> reverse(Acc).
+
+bsm_opt_2([{test,bs_test_tail2,F,[Ctx,Bits]}|Is],
+	  [{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]}|Acc]) ->
+    bsm_opt_2(Is, [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|Acc]);
+bsm_opt_2([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,_]}|Is],
+	  [{test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,Flags]}|Acc]) ->
+    bsm_opt_2(Is, [{test,bs_skip_bits2,F,
+		    [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]}|Acc]);
+bsm_opt_2([I|Is], Acc) ->
+    bsm_opt_2(Is, [I|Acc]);
+bsm_opt_2([], Acc) -> reverse(Acc).
+
+%% bsm_not_bs_test({test,Name,_,Operands}) -> true|false.
+%%  Test whether is the test is a "safe", i.e. does not move the
+%%  bit offset for a binary.
+%%
+%%  'true' means that the test is safe, 'false' that we don't know or
+%%  that the test moves the offset (e.g. bs_get_integer2).
+
+bsm_not_bs_test({test,bs_test_tail2,_,[_,_]}) -> true;
+bsm_not_bs_test(Test) -> beam_utils:is_pure_test(Test).
+
+bsm_subst_labels(Fs, Save, D) ->
+    bsm_subst_labels_1(Fs, Save, D, []).
+
+bsm_subst_labels_1([F|Fs], Save, D, Acc) ->
+    bsm_subst_labels_1(Fs, Save, D, [bsm_subst_label(F, Save, D)|Acc]);
+bsm_subst_labels_1([], _, _, Acc) ->
+    reverse(Acc).
+
+bsm_subst_label({f,Lbl0}=F, Save, D) ->
+    case gb_trees:lookup({Lbl0,Save}, D) of
+	{value,Lbl} -> {f,Lbl};
+	none -> F
+    end;
+bsm_subst_label(Other, _, _) -> Other.
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index 919ee3ee7d..d9108c383d 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -141,7 +141,7 @@ renumber_labels([{bif,is_record,{f,_},
     renumber_labels(Is, Acc, St);
 renumber_labels([{test,is_record,{f,_}=Fail,
 		  [Term,{atom,Tag}=TagAtom,{integer,Arity}]}|Is0], Acc, St) ->
-    Tmp = {x,1023},
+    Tmp = {x,1022},
     Is = case is_record_tuple(Term, Tag, Arity) of
 	     yes ->
 		 Is0;
@@ -190,17 +190,11 @@ replace([{test,Test,{f,Lbl},Ops}|Is], Acc, D) ->
 replace([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D) ->
     replace(Is, [{test,Test,{f,label(Lbl, D)},Live,Ops,Dst}|Acc], D);
 replace([{select,I,R,{f,Fail0},Vls0}|Is], Acc, D) ->
-    Vls1 = map(fun ({f,L}) -> {f,label(L, D)};
-		   (Other) -> Other end, Vls0),
+    Vls = map(fun ({f,L}) -> {f,label(L, D)};
+		   (Other) -> Other
+	      end, Vls0),
     Fail = label(Fail0, D),
-    case redundant_values(Vls1, Fail, []) of
-	[] ->
-	    %% Oops, no choices left. The loader will not accept that.
-	    %% Convert to a plain jump.
-	    replace(Is, [{jump,{f,Fail}}|Acc], D);
-	Vls ->
-	    replace(Is, [{select,I,R,{f,Fail},Vls}|Acc], D)
-    end;
+    replace(Is, [{select,I,R,{f,Fail},Vls}|Acc], D);
 replace([{'try',R,{f,Lbl}}|Is], Acc, D) ->
     replace(Is, [{'try',R,{f,label(Lbl, D)}}|Acc], D);
 replace([{'catch',R,{f,Lbl}}|Is], Acc, D) ->
@@ -241,12 +235,6 @@ label(Old, D) ->
 	{value,Val} -> Val;
 	none -> throw({error,{undefined_label,Old}})
     end.
-	    
-redundant_values([_,{f,Fail}|Vls], Fail, Acc) ->
-    redundant_values(Vls, Fail, Acc);
-redundant_values([Val,Lbl|Vls], Fail, Acc) ->
-    redundant_values(Vls, Fail, [Lbl,Val|Acc]);
-redundant_values([], _, Acc) -> reverse(Acc).
 
 %%%
 %%% Final fixup of bs_start_match2/5,bs_save2/bs_restore2 instructions for
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index ead88b57e9..11129c39bc 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -239,11 +239,26 @@ backward([{test,is_eq_exact,Fail,[Dst,{integer,Arity}]}=I|
 backward([{label,Lbl}=L|Is], D, Acc) ->
     backward(Is, beam_utils:index_label(Lbl, Acc, D), [L|Acc]);
 backward([{select,select_val,Reg,{f,Fail0},List0}|Is], D, Acc) ->
-    List = shortcut_select_list(List0, Reg, D, []),
+    List1 = shortcut_select_list(List0, Reg, D, []),
     Fail1 = shortcut_label(Fail0, D),
     Fail = shortcut_bs_test(Fail1, Is, D),
-    Sel = {select,select_val,Reg,{f,Fail},List},
-    backward(Is, D, [Sel|Acc]);
+    List = prune_redundant(List1, Fail),
+    case List of
+	[] ->
+	    Jump = {jump,{f,Fail}},
+	    backward([Jump|Is], D, Acc);
+	[V,F] ->
+	    Test = {test,is_eq_exact,{f,Fail},[Reg,V]},
+	    Jump = {jump,F},
+	    backward([Jump,Test|Is], D, Acc);
+	[{atom,B1},F,{atom,B2},F] when B1 =:= not B2 ->
+	    Test = {test,is_boolean,{f,Fail},[Reg]},
+	    Jump = {jump,F},
+	    backward([Jump,Test|Is], D, Acc);
+	[_|_] ->
+	    Sel = {select,select_val,Reg,{f,Fail},List},
+	    backward(Is, D, [Sel|Acc])
+    end;
 backward([{jump,{f,To0}},{move,Src,Reg}=Move|Is], D, Acc) ->
     To = shortcut_select_label(To0, Reg, Src, D),
     Jump = {jump,{f,To}},
@@ -257,14 +272,17 @@ backward([{jump,{f,To}}=J|[{bif,Op,_,Ops,Reg}|Is]=Is0], D, Acc) ->
     catch
 	throw:not_possible -> backward(Is0, D, [J|Acc])
     end;
-backward([{test,bs_start_match2,F,_,[R,_],Ctxt}=I|Is], D,
+backward([{test,bs_start_match2,F,Live,[R,_]=Args,Ctxt}|Is], D,
 	 [{test,bs_match_string,F,[Ctxt,Bs]},
 	  {test,bs_test_tail2,F,[Ctxt,0]}|Acc0]=Acc) ->
+    {f,To0} = F,
+    To = shortcut_bs_start_match(To0, R, D),
     case beam_utils:is_killed(Ctxt, Acc0, D) of
 	true ->
-	    Eq = {test,is_eq_exact,F,[R,{literal,Bs}]},
+	    Eq = {test,is_eq_exact,{f,To},[R,{literal,Bs}]},
 	    backward(Is, D, [Eq|Acc0]);
 	false ->
+	    I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
 	    backward(Is, D, [I|Acc])
     end;
 backward([{test,bs_start_match2,{f,To0},Live,[Src|_]=Info,Dst}|Is], D, Acc) ->
@@ -295,7 +313,28 @@ backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) ->
 	    is_eq_exact -> combine_eqs(To, Ops0, D, Acc);
 	    _ -> {test,Op,{f,To},Ops0}
 	end,
-    backward(Is, D, [I|Acc]);
+    case {I,Acc} of
+	{{test,is_atom,Fail,Ops0},[{test,is_boolean,Fail,Ops0}|_]} ->
+	    %% An is_atom test before an is_boolean test (with the
+	    %% same failure label) is redundant.
+	    backward(Is, D, Acc);
+	{{test,is_atom,Fail,[R]},
+	 [{test,is_eq_exact,Fail,[R,{atom,_}]}|_]} ->
+	    %% An is_atom test before a comparison with an atom (with
+	    %% the same failure label) is redundant.
+	    backward(Is, D, Acc);
+	{{test,is_integer,Fail,[R]},
+	 [{test,is_eq_exact,Fail,[R,{integer,_}]}|_]} ->
+	    %% An is_integer test before a comparison with an integer
+	    %% (with the same failure label) is redundant.
+	    backward(Is, D, Acc);
+	{{test,_,_,_},_} ->
+	    %% Still a test instruction. Done.
+	    backward(Is, D, [I|Acc]);
+	{_,_} ->
+	    %% Rewritten to a select_val. Rescan.
+	    backward([I|Is], D, Acc)
+    end;
 backward([{test,Op,{f,To0},Live,Ops0,Dst}|Is], D, Acc) ->
     To1 = shortcut_bs_test(To0, Is, D),
     To2 = shortcut_label(To1, D),
@@ -348,6 +387,12 @@ shortcut_label(To0, D) ->
 shortcut_select_label(To, Reg, Lit, D) ->
     shortcut_rel_op(To, is_ne_exact, [Reg,Lit], D).
 
+prune_redundant([_,{f,Fail}|T], Fail) ->
+    prune_redundant(T, Fail);
+prune_redundant([V,F|T], Fail) ->
+    [V,F|prune_redundant(T, Fail)];
+prune_redundant([], _) -> [].
+
 %% Replace a comparison operator with a test instruction and a jump.
 %% For example, if we have this code:
 %%
diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl
index 2b5f8c1b7f..654fb47dbd 100644
--- a/lib/compiler/src/beam_dict.erl
+++ b/lib/compiler/src/beam_dict.erl
@@ -44,7 +44,7 @@
 	 locals = []		    :: [{label(), arity(), label()}],
 	 imports = gb_trees:empty() :: import_tab(),
 	 strings = <<>>		    :: binary(),	%String pool
-	 lambdas = [],				%[{...}]
+	 lambdas = {0,[]},				%[{...}]
 	 literals = dict:new()	    :: literal_tab(),
 	 fnames = #{}               :: fname_tab(),
 	 lines = #{}                :: line_tab(),
@@ -145,15 +145,14 @@ string(Str, Dict) when is_list(Str) ->
 -spec lambda(label(), non_neg_integer(), bdict()) ->
         {non_neg_integer(), bdict()}.
 
-lambda(Lbl, NumFree, #asm{lambdas=Lambdas0}=Dict) ->
-    OldIndex = length(Lambdas0),
+lambda(Lbl, NumFree, #asm{lambdas={OldIndex,Lambdas0}}=Dict) ->
     %% Set Index the same as OldIndex.
     Index = OldIndex,
     %% Initialize OldUniq to 0. It will be set to an unique value
     %% based on the MD5 checksum of the BEAM code for the module.
     OldUniq = 0,
     Lambdas = [{Lbl,{OldIndex,Lbl,Index,NumFree,OldUniq}}|Lambdas0],
-    {OldIndex,Dict#asm{lambdas=Lambdas}}.
+    {OldIndex,Dict#asm{lambdas={OldIndex+1,Lambdas}}}.
 
 %% Returns the index for a literal (adding it to the literal table if necessary).
 %%    literal(Literal, Dict) -> {Index,Dict'}
@@ -236,13 +235,13 @@ string_table(#asm{strings=Strings,string_offset=Size}) ->
 
 -spec lambda_table(bdict()) -> {non_neg_integer(), [<<_:192>>]}.
 
-lambda_table(#asm{locals=Loc0,lambdas=Lambdas0}) ->
+lambda_table(#asm{locals=Loc0,lambdas={NumLambdas,Lambdas0}}) ->
     Lambdas1 = sofs:relation(Lambdas0),
     Loc = sofs:relation([{Lbl,{F,A}} || {F,A,Lbl} <- Loc0]),
     Lambdas2 = sofs:relative_product1(Lambdas1, Loc),
     Lambdas = [<<F:32,A:32,Lbl:32,Index:32,NumFree:32,OldUniq:32>> ||
 		  {{_,Lbl,Index,NumFree,OldUniq},{F,A}} <- sofs:to_external(Lambdas2)],
-    {length(Lambdas),Lambdas}.
+    {NumLambdas,Lambdas}.
 
 %% Returns the literal table.
 %%    literal_table(Dict) -> {NumLiterals, [<<TermSize>>,TermInExternalFormat]}
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 5e58e0f6ac..3b6eb19fe8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -495,7 +495,7 @@ is_label_used_in_block({set,_,_,Info}, Lbl) ->
         {alloc,_,{gc_bif,_,{f,F}}} -> F =:= Lbl;
         {alloc,_,{put_map,_,{f,F}}} -> F =:= Lbl;
         {get_map_elements,{f,F}} -> F =:= Lbl;
-        {'catch',{f,F}} -> F =:= Lbl;
+        {try_catch,_,{f,F}} -> F =:= Lbl;
         {alloc,_,_} -> false;
         {put_tuple,_} -> false;
         {get_tuple_element,_} -> false;
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 17fd2e502a..0c1abfe6a0 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -65,18 +65,6 @@ function({function,Name,Arity,CLabel,Is0}) ->
 %%      InEncoding =:= latin1, OutEncoding =:= unicode; 
 %%      InEncoding =:= latin1, OutEncoding =:= utf8 ->
 %%
-%% (2) A select_val/4 instruction that only verifies that
-%%     its argument is either 'true' or 'false' can be
-%%     be replaced with an is_boolean/2 instruction. That is:
-%%
-%%          select_val Reg Fail   [ true Next false Next ]
-%%        Next: ...
-%%         
-%%     can be rewritten to
-%%
-%%          is_boolean Fail Reg
-%%        Next: ...
-%%
 
 peep(Is) ->
     peep(Is, gb_sets:empty(), []).
@@ -95,12 +83,16 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
     %% Kill all remembered tests that depend on the destination register.
     SeenTests = kill_seen(Dst, SeenTests0),
     peep(Is, SeenTests, [I|Acc]);
-peep([{test,is_boolean,{f,Fail},Ops}|_]=Is, SeenTests,
-     [{test,is_atom,{f,Fail},Ops}|Acc]) ->
-    %% The previous is_atom/2 test (with the same failure label) is redundant.
-    %% (If is_boolean(Src) is true, is_atom(Src) is also true, so it is
-    %% OK to still remember that we have seen is_atom/1.)
-    peep(Is, SeenTests, Acc);
+peep([{select,Op,R,F,Vls0}|Is], _, Acc) ->
+    case prune_redundant_values(Vls0, F) of
+	[] ->
+	    %% No values left. Must convert to plain jump.
+	    I = {jump,F},
+	    peep(Is, gb_sets:empty(), [I|Acc]);
+	[_|_]=Vls ->
+	    I = {select,Op,R,F,Vls},
+	    peep(Is, gb_sets:empty(), [I|Acc])
+    end;
 peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) ->
     case beam_utils:is_pure_test(I) of
 	false ->
@@ -121,16 +113,6 @@ peep([{test,Op,_,Ops}=I|Is], SeenTests0, Acc) ->
 		    peep(Is, SeenTests, [I|Acc])
 	    end
     end;
-peep([{select,select_val,Src,Fail,
-       [{atom,false},{f,L},{atom,true},{f,L}]}|
-      [{label,L}|_]=Is], SeenTests, Acc) ->
-    I = {test,is_boolean,Fail,[Src]},
-    peep([I|Is], SeenTests, Acc);
-peep([{select,select_val,Src,Fail,
-       [{atom,true},{f,L},{atom,false},{f,L}]}|
-      [{label,L}|_]=Is], SeenTests, Acc) ->
-    I = {test,is_boolean,Fail,[Src]},
-    peep([I|Is], SeenTests, Acc);
 peep([I|Is], _, Acc) ->
     %% An unknown instruction. Throw away all information we
     %% have collected about test instructions.
@@ -155,3 +137,9 @@ kill_seen_1([{_,Ops}=Test|T], Dst) ->
 	false -> [Test|kill_seen_1(T, Dst)]
     end;
 kill_seen_1([], _) -> [].
+
+prune_redundant_values([_Val,F|Vls], F) ->
+    prune_redundant_values(Vls, F);
+prune_redundant_values([Val,Lbl|Vls], F) ->
+    [Val,Lbl|prune_redundant_values(Vls, F)];
+prune_redundant_values([], _) -> [].
diff --git a/lib/compiler/src/beam_reorder.erl b/lib/compiler/src/beam_reorder.erl
new file mode 100644
index 0000000000..41586a7bf2
--- /dev/null
+++ b/lib/compiler/src/beam_reorder.erl
@@ -0,0 +1,139 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1999-2013. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(beam_reorder).
+
+-export([module/2]).
+-import(lists, [member/2,reverse/1]).
+
+module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+    Fs = [function(F) || F <- Fs0],
+    {ok,{Mod,Exp,Attr,Fs,Lc}}.
+
+function({function,Name,Arity,CLabel,Is0}) ->
+    try
+	Is = reorder(Is0),
+	{function,Name,Arity,CLabel,Is}
+    catch
+	Class:Error ->
+	    Stack = erlang:get_stacktrace(),
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
+
+%% reorder(Instructions0) -> Instructions
+%%  Reorder instructions before the beam_block pass, because reordering
+%%  will be more cumbersome when the blocks are in place.
+%%
+%%  Execution of get_tuple_element instructions can be delayed until
+%%  they are actually needed. Consider the sequence:
+%%
+%%      get_tuple_element Tuple Pos Dst
+%%      test Test Fail Operands
+%%
+%%  If Dst is killed at label Fail (and not referenced in Operands),
+%%  we can can swap the instructions:
+%%
+%%      test Test Fail Operands
+%%      get_tuple_element Tuple Pos Dst
+%%
+%%  That can be beneficial in two ways: Firstly, if the branch is taken
+%%  we have avoided execution of the get_tuple_element instruction.
+%%  Secondly, even if the branch is not taken, subsequent optimization
+%%  (opt_blocks/1) may be able to change Dst to the final destination
+%%  register and eliminate a 'move' instruction.
+
+reorder(Is) ->
+    D = beam_utils:index_labels(Is),
+    reorder_1(Is, D, []).
+
+reorder_1([{Op,_,_}=TryCatch|[I|Is]=Is0], D, Acc)
+  when Op =:= 'catch'; Op =:= 'try' ->
+    %% Don't allow 'try' or 'catch' instructions to split blocks if
+    %% it can be avoided.
+    case is_safe(I) of
+	false ->
+	    reorder_1(Is0, D, [TryCatch|Acc]);
+	true ->
+	    reorder_1([TryCatch|Is], D, [I|Acc])
+    end;
+reorder_1([{label,L}=I|_], D, Acc) ->
+    Is = beam_utils:code_at(L, D),
+    reorder_1(Is, D, [I|Acc]);
+reorder_1([{test,is_nonempty_list,_,_}=I|Is], D, Acc) ->
+    %% The run-time system may combine the is_nonempty_list test with
+    %% the following get_list instruction.
+    reorder_1(Is, D, [I|Acc]);
+reorder_1([{test,_,_,_}=I,
+	   {select,_,_,_,_}=S|Is], D, Acc) ->
+    %% There is nothing to gain by inserting a get_tuple_element
+    %% instruction between the test instruction and the select
+    %% instruction.
+    reorder_1(Is, D, [S,I|Acc]);
+reorder_1([{test,_,{f,L},Ss}=I|Is0], D0,
+	  [{get_tuple_element,_,_,El}=G|Acc0]=Acc) ->
+    case member(El, Ss) of
+	true ->
+	    reorder_1(Is0, D0, [I|Acc]);
+	false ->
+	    case beam_utils:is_killed_at(El, L, D0) of
+		true ->
+		    Is = [I,G|Is0],
+		    reorder_1(Is, D0, Acc0);
+		false ->
+		    case beam_utils:is_killed(El, Is0, D0) of
+			true ->
+			    Code0 = beam_utils:code_at(L, D0),
+			    Code = [G|Code0],
+			    D = beam_utils:index_label(L, Code, D0),
+			    Is = [I|Is0],
+			    reorder_1(Is, D, Acc0);
+			false ->
+			    reorder_1(Is0, D0, [I|Acc])
+		    end
+	    end
+    end;
+reorder_1([{allocate_zero,N,Live}=I0|Is], D,
+	  [{get_tuple_element,{x,Tup},_,{x,Dst}}=G|Acc]=Acc0) ->
+    case Tup < Dst andalso Dst+1 =:= Live of
+	true ->
+	    %% Move allocation instruction upwards past
+	    %% get_tuple_element instructions to create more
+	    %% opportunities for moving get_tuple_element
+	    %% instructions.
+	    I = {allocate_zero,N,Dst},
+	    reorder_1([I,G|Is], D, Acc);
+	false ->
+	    reorder_1(Is, D, [I0|Acc0])
+    end;
+reorder_1([I|Is], D, Acc) ->
+    reorder_1(Is, D, [I|Acc]);
+reorder_1([], _, Acc) -> reverse(Acc).
+
+%% is_safe(Instruction) -> true|false
+%%  Test whether an instruction is safe (cannot cause an exception).
+
+is_safe({kill,_}) -> true;
+is_safe({move,_,_}) -> true;
+is_safe({put,_}) -> true;
+is_safe({put_list,_,_,_}) -> true;
+is_safe({put_tuple,_,_}) -> true;
+is_safe({test_heap,_,_}) -> true;
+is_safe(_) -> false.
diff --git a/lib/compiler/src/beam_split.erl b/lib/compiler/src/beam_split.erl
index 3be9311080..bb1c0e23a9 100644
--- a/lib/compiler/src/beam_split.erl
+++ b/lib/compiler/src/beam_split.erl
@@ -57,8 +57,8 @@ split_block([{set,[D],[S|Puts],{alloc,R,{put_map,Op,{f,Lbl}=Fail}}}|Is],
 split_block([{set,Ds,[S|Ss],{get_map_elements,Fail}}|Is], Bl, Acc) ->
     Gets = beam_utils:join_even(Ss,Ds),
     split_block(Is, [], [{get_map_elements,Fail,S,{list,Gets}}|make_block(Bl, Acc)]);
-split_block([{set,[R],[],{'catch',L}}|Is], Bl, Acc) ->
-    split_block(Is, [], [{'catch',R,L}|make_block(Bl, Acc)]);
+split_block([{set,[R],[],{try_catch,Op,L}}|Is], Bl, Acc) ->
+    split_block(Is, [], [{Op,R,L}|make_block(Bl, Acc)]);
 split_block([{set,[],[],{line,_}=Line}|Is], Bl, Acc) ->
     split_block(Is, [], [Line|make_block(Bl, Acc)]);
 split_block([I|Is], Bl, Acc) ->
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index 5298589f83..4b45c28623 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -23,7 +23,8 @@
 
 -export([module/2]).
 
--import(lists, [foldl/3,reverse/1,filter/2]).
+-import(lists, [filter/2,foldl/3,keyfind/3,member/2,
+		reverse/1,reverse/2,sort/1]).
 
 module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
     Fs = [function(F) || F <- Fs0],
@@ -92,8 +93,19 @@ simplify_basic_1([{set,[D],[TupleReg],{get_tuple_element,0}}=I|Is0], Ts0, Acc) -
 	    Ts = update(I, Ts0),
 	    simplify_basic_1(Is0, Ts, [I|Acc])
     end;
-simplify_basic_1([{set,_,_,{'catch',_}}=I|Is], _Ts, Acc) ->
+simplify_basic_1([{set,_,_,{try_catch,_,_}}=I|Is], _Ts, Acc) ->
     simplify_basic_1(Is, tdb_new(), [I|Acc]);
+simplify_basic_1([{test,is_atom,_,[R]}=I|Is], Ts, Acc) ->
+    case tdb_find(R, Ts) of
+	boolean -> simplify_basic_1(Is, Ts, Acc);
+	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+    end;
+simplify_basic_1([{test,is_integer,_,[R]}=I|Is], Ts, Acc) ->
+    case tdb_find(R, Ts) of
+	integer -> simplify_basic_1(Is, Ts, Acc);
+	{integer,_} -> simplify_basic_1(Is, Ts, Acc);
+	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+    end;
 simplify_basic_1([{test,is_tuple,_,[R]}=I|Is], Ts, Acc) ->
     case tdb_find(R, Ts) of
 	{tuple,_,_} -> simplify_basic_1(Is, Ts, Acc);
@@ -137,6 +149,16 @@ simplify_basic_1([{test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I|Is], Ts0
 	    Ts = update(I, Ts0),
 	    simplify_basic_1(Is, Ts, [I|Acc])
     end;
+simplify_basic_1([{select,select_val,Reg,_,_}=I0|Is], Ts, Acc) ->
+    I = case tdb_find(Reg, Ts) of
+	    {integer,Range} ->
+		simplify_select_val_int(I0, Range);
+	    boolean ->
+		simplify_select_val_bool(I0);
+	    _ ->
+		I0
+	end,
+    simplify_basic_1(Is, tdb_new(), [I|Acc]);
 simplify_basic_1([I|Is], Ts0, Acc) ->
     Ts = update(I, Ts0),
     simplify_basic_1(Is, Ts, [I|Acc]);
@@ -144,6 +166,32 @@ simplify_basic_1([], Ts, Acc) ->
     Is = reverse(Acc),
     {Is,Ts}.
 
+simplify_select_val_int({select,select_val,R,_,L0}=I, {Min,Max}) ->
+    Vs = sort([V || {integer,V} <- L0]),
+    case eq_ranges(Vs, Min, Max) of
+	false -> I;
+	true -> simplify_select_val_1(L0, {integer,Max}, R, [])
+    end.
+
+simplify_select_val_bool({select,select_val,R,_,L}=I) ->
+    Vs = sort([V || {atom,V} <- L]),
+    case Vs of
+	[false,true] ->
+	    simplify_select_val_1(L, {atom,false}, R, []);
+	_ ->
+	    I
+    end.
+
+simplify_select_val_1([Val,F|T], Val, R, Acc) ->
+    L = reverse(Acc, T),
+    {select,select_val,R,F,L};
+simplify_select_val_1([V,F|T], Val, R, Acc) ->
+    simplify_select_val_1(T, Val, R, [F,V|Acc]).
+
+eq_ranges([H], H, H) -> true;
+eq_ranges([H|T], H, Max) -> eq_ranges(T, H+1, Max);
+eq_ranges(_, _, _) -> false.
+
 %% simplify_float([Instruction], TypeDatabase) ->
 %%                 {[Instruction],TypeDatabase'} | not_possible
 %%  Simplify floating point operations in blocks.
@@ -199,7 +247,7 @@ simplify_float_1([{set,[D0],[A0,B0],{alloc,_,{gc_bif,Op0,{f,0}}}}=I|Is]=Is0,
 	    Ts = tdb_update([{D0,float}], Ts0),
 	    simplify_float_1(Is, Ts, Rs, Acc)
     end;
-simplify_float_1([{set,_,_,{'catch',_}}=I|Is]=Is0, _Ts, Rs0, Acc0) ->
+simplify_float_1([{set,_,_,{try_catch,_,_}}=I|Is]=Is0, _Ts, Rs0, Acc0) ->
     Acc = flush_all(Rs0, Is0, Acc0),
     simplify_float_1(Is, tdb_new(), Rs0, [I|Acc]);
 simplify_float_1([{set,_,_,{line,_}}=I|Is], Ts, Rs, Acc) ->
@@ -311,7 +359,7 @@ flt_need_heap_2({set,_,_,{get_tuple_element,_}}, H, Fl) ->
     {[],H,Fl};
 flt_need_heap_2({set,_,_,get_list}, H, Fl) ->
     {[],H,Fl};
-flt_need_heap_2({set,_,_,{'catch',_}}, H, Fl) ->
+flt_need_heap_2({set,_,_,{try_catch,_,_}}, H, Fl) ->
     {[],H,Fl};
 %% All other instructions should cause the insertion of an allocation
 %% instruction if needed.
@@ -382,6 +430,17 @@ update({set,[D],[{integer,I},Reg],{bif,element,_}}, Ts0) ->
     tdb_update([{Reg,{tuple,I,[]}},{D,kill}], Ts0);
 update({set,[D],[_Index,Reg],{bif,element,_}}, Ts0) ->
     tdb_update([{Reg,{tuple,0,[]}},{D,kill}], Ts0);
+update({set,[D],Args,{bif,N,_}}, Ts0) ->
+    Ar = length(Args),
+    BoolOp = erl_internal:new_type_test(N, Ar) orelse
+	erl_internal:comp_op(N, Ar) orelse
+	erl_internal:bool_op(N, Ar),
+    case BoolOp of
+	true ->
+	    tdb_update([{D,boolean}], Ts0);
+	false ->
+	    tdb_update([{D,kill}], Ts0)
+    end;
 update({set,[D],[S],{get_tuple_element,0}}, Ts) ->
     tdb_update([{D,{tuple_element,S,0}}], Ts);
 update({set,[D],[S],{alloc,_,{gc_bif,float,{f,0}}}}, Ts0) ->
@@ -390,6 +449,13 @@ update({set,[D],[S],{alloc,_,{gc_bif,float,{f,0}}}}, Ts0) ->
 	true ->  tdb_update([{D,float}], Ts0);
 	false -> Ts0
     end;
+update({set,[D],[S1,S2],{alloc,_,{gc_bif,'band',{f,0}}}}, Ts) ->
+    case keyfind(integer, 1, [S1,S2]) of
+	{integer,N} ->
+	    update_band(N, D, Ts);
+	false ->
+	    tdb_update([{D,integer}], Ts)
+    end;
 update({set,[D],[S1,S2],{alloc,_,{gc_bif,'/',{f,0}}}}, Ts0) ->
     %% Make sure we reject non-numeric literals.
     case possibly_numeric(S1) andalso possibly_numeric(S2) of
@@ -397,15 +463,17 @@ update({set,[D],[S1,S2],{alloc,_,{gc_bif,'/',{f,0}}}}, Ts0) ->
 	false -> Ts0
     end;
 update({set,[D],[S1,S2],{alloc,_,{gc_bif,Op,{f,0}}}}, Ts0) ->
-    case arith_op(Op) of
-	no ->
-	    tdb_update([{D,kill}], Ts0);
-	{yes,_} ->
+    case op_type(Op) of
+	integer ->
+	    tdb_update([{D,integer}], Ts0);
+	{float,_} ->
 	    case {tdb_find(S1, Ts0),tdb_find(S2, Ts0)} of
 		{float,_} -> tdb_update([{D,float}], Ts0);
 		{_,float} -> tdb_update([{D,float}], Ts0);
 		{_,_} -> tdb_update([{D,kill}], Ts0)
-	    end
+	    end;
+	unknown ->
+	    tdb_update([{D,kill}], Ts0)
     end;
 update({set,[],_Src,_Op}, Ts0) -> Ts0;
 update({set,[D],_Src,_Op}, Ts0) ->
@@ -437,6 +505,8 @@ update({test,is_record,_Fail,[Src,Tag,{integer,Arity}]}, Ts) ->
     tdb_update([{Src,{tuple,Arity,[Tag]}}], Ts);
 update({test,_Test,_Fail,_Other}, Ts) ->
     Ts;
+update({test,bs_get_integer2,_,_,Args,Dst}, Ts) ->
+    tdb_update([{Dst,get_bs_integer_type(Args)}], Ts);
 update({call_ext,Ar,{extfunc,math,Math,Ar}}, Ts) ->
     case is_math_bif(Math, Ar) of
 	true -> tdb_update([{{x,0},float}], Ts);
@@ -453,10 +523,43 @@ update({call,_Arity,_Func}, Ts) -> tdb_kill_xregs(Ts);
 update({call_ext,_Arity,_Func}, Ts) -> tdb_kill_xregs(Ts);
 update({make_fun2,_,_,_,_}, Ts) -> tdb_kill_xregs(Ts);
 update({line,_}, Ts) -> Ts;
+update({bs_save2,_,_}, Ts) -> Ts;
+update({bs_restore2,_,_}, Ts) -> Ts;
 
 %% The instruction is unknown.  Kill all information.
 update(_I, _Ts) -> tdb_new().
 
+update_band(N, Reg, Ts) ->
+    Type = update_band_1(N, 0),
+    tdb_update([{Reg,Type}], Ts).
+
+update_band_1(N, Bits) when Bits < 64 ->
+    case 1 bsl Bits of
+	P when P =:= N + 1 ->
+	    {integer,{0,N}};
+	P when P > N + 1 ->
+	    integer;
+	_ ->
+	    update_band_1(N, Bits+1)
+    end;
+update_band_1(_, _) ->
+    %% Negative or large positive number. Give up.
+    integer.
+
+get_bs_integer_type([_,{integer,N},U,{field_flags,Fl}])
+  when N*U < 64 ->
+    NumBits = N*U,
+    case member(unsigned, Fl) of
+	true ->
+	    {integer,{0,(1 bsl NumBits)-1}};
+	false ->
+	    %% Signed integer. Don't bother.
+	    integer
+    end;
+get_bs_integer_type(_) ->
+    %% Avoid creating ranges with a huge upper limit.
+    integer.
+
 is_math_bif(cos, 1) -> true;
 is_math_bif(cosh, 1) -> true;
 is_math_bif(sin, 1) -> true;
@@ -545,11 +648,22 @@ load_reg(V, Ts, Rs0, Is0) ->
 	    {Rs,Is}
     end.
 
-arith_op('+') -> {yes,fadd};
-arith_op('-') -> {yes,fsub};
-arith_op('*') -> {yes,fmul};
-arith_op('/') -> {yes,fdiv};
-arith_op(_) -> no.
+arith_op(Op) ->
+    case op_type(Op) of
+	{float,Instr} -> {yes,Instr};
+	_ -> no
+    end.
+
+op_type('+') -> {float,fadd};
+op_type('-') -> {float,fsub};
+op_type('*') -> {float,fmul};
+%% '/' and 'band' are specially handled.
+op_type('bor') -> integer;
+op_type('bxor') -> integer;
+op_type('bsl') -> integer;
+op_type('bsr') -> integer;
+op_type('div') -> integer;
+op_type(_) -> unknown.
 
 flush(Rs, [{set,[_],[],{put_tuple,_}}|_]=Is0, Acc0) ->
     Acc = flush_all(Rs, Is0, Acc0),
@@ -618,7 +732,6 @@ checkerror(Is) ->
     checkerror_1(Is, Is).
 
 checkerror_1([{set,[],[],fcheckerror}|_], OrigIs) -> OrigIs;
-checkerror_1([{set,[],[],fclearerror}|_], OrigIs) -> OrigIs;
 checkerror_1([{set,_,_,{bif,fadd,_}}|_], OrigIs) -> checkerror_2(OrigIs);
 checkerror_1([{set,_,_,{bif,fsub,_}}|_], OrigIs) -> checkerror_2(OrigIs);
 checkerror_1([{set,_,_,{bif,fmul,_}}|_], OrigIs) -> checkerror_2(OrigIs);
@@ -640,6 +753,9 @@ checkerror_2(OrigIs) -> [{set,[],[],fcheckerror}|OrigIs].
 %%% of the first element).
 %%%
 %%% 'float' means that the register contains a float.
+%%%
+%%% 'integer' or {integer,{Min,Max}} that the register contains an
+%%% integer.
 
 %% tdb_new() -> EmptyDataBase
 %%  Creates a new, empty type database.
@@ -729,10 +845,20 @@ merge_type_info({tuple,Sz1,[]}, {tuple,_Sz2,First}=Tuple2) ->
     merge_type_info({tuple,Sz1,First}, Tuple2);
 merge_type_info({tuple,_Sz1,First}=Tuple1, {tuple,Sz2,_}) ->
     merge_type_info(Tuple1, {tuple,Sz2,First});
+merge_type_info(integer, {integer,_}=Int) ->
+    Int;
+merge_type_info({integer,_}=Int, integer) ->
+    Int;
+merge_type_info({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) ->
+    {integer,{max(Min1, Min2),min(Max1, Max2)}};
 merge_type_info(NewType, _) ->
     verify_type(NewType),
     NewType.
 
+verify_type(boolean) -> ok;
+verify_type(integer) -> ok;
+verify_type({integer,{Min,Max}})
+  when is_integer(Min), is_integer(Max) -> ok;
 verify_type(map) -> ok;
 verify_type(nonempty_list) -> ok;
 verify_type({tuple,Sz,[]}) when is_integer(Sz) -> ok;
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index fbcd5de1bb..68d6105cfa 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -484,6 +484,15 @@ check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) ->
 		    Other
 	    end
     end;
+check_liveness(R, [{test_heap,N,Live}|Is], St) ->
+    I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]},
+    check_liveness(R, [I|Is], St);
+check_liveness(R, [{allocate_zero,N,Live}|Is], St) ->
+    I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]},
+    check_liveness(R, [I|Is], St);
+check_liveness(R, [{get_list,S,D1,D2}|Is], St) ->
+    I = {block,[{set,[D1,D2],[S],get_list}]},
+    check_liveness(R, [I|Is], St);
 check_liveness(_R, Is, St) when is_list(Is) ->
 %%     case Is of
 %% 	[I|_] ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 6004f1974e..fd38fc0095 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -31,15 +31,6 @@
 
 -import(lists, [reverse/1,foldl/3,foreach/2,dropwhile/2]).
 
--define(MAXREG, 1024).
-
-%%-define(DEBUG, 1).
--ifdef(DEBUG).
--define(DBG_FORMAT(F, D), (io:format((F), (D)))).
--else.
--define(DBG_FORMAT(F, D), ok).
--endif.
-
 %% To be called by the compiler.
 module({Mod,Exp,Attr,Fs,Lc}=Code, _Opts)
   when is_atom(Mod), is_list(Exp), is_list(Attr), is_integer(Lc) ->
@@ -170,29 +161,18 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
 	 		% in the module (those that start with bs_start_match2).
 	}).
 
--ifdef(DEBUG).
-print_st(#st{x=Xs,y=Ys,numy=NumY,h=H,ct=Ct}) ->
-    io:format("  #st{x=~p~n"
-	      "      y=~p~n"
-	      "      numy=~p,h=~p,ct=~w~n",
-	      [gb_trees:to_list(Xs),gb_trees:to_list(Ys),NumY,H,Ct]).
--endif.
-
 validate_1(Is, Name, Arity, Entry, Ft) ->
     validate_2(labels(Is), Name, Arity, Entry, Ft).
 
 validate_2({Ls1,[{func_info,{atom,Mod},{atom,Name},Arity}=_F|Is]},
 	   Name, Arity, Entry, Ft) ->
-    lists:foreach(fun (_L) -> ?DBG_FORMAT("  ~p.~n", [{label,_L}]) end, Ls1),
-    ?DBG_FORMAT("  ~p.~n", [_F]),
     validate_3(labels(Is), Name, Arity, Entry, Mod, Ls1, Ft);
 validate_2({Ls1,Is}, Name, Arity, _Entry, _Ft) ->
     error({{'_',Name,Arity},{first(Is),length(Ls1),illegal_instruction}}).
 
 validate_3({Ls2,Is}, Name, Arity, Entry, Mod, Ls1, Ft) ->
-    lists:foreach(fun (_L) -> ?DBG_FORMAT("  ~p.~n", [{label,_L}]) end, Ls2),
     Offset = 1 + length(Ls1) + 1 + length(Ls2),
-    EntryOK = (Entry =:= undefined) orelse lists:member(Entry, Ls2),
+    EntryOK = lists:member(Entry, Ls2),
     if
 	EntryOK ->
 	    St = init_state(Arity),
@@ -260,7 +240,6 @@ valfun([], MFA, _Offset, #vst{branched=Targets0,labels=Labels0}=Vst) ->
 	    error({MFA,Error})
     end;
 valfun([I|Is], MFA, Offset, Vst0) ->
-    ?DBG_FORMAT("    ~p.\n", [I]),
     valfun(Is, MFA, Offset+1,
 	   try
 	       Vst = val_dsetel(I, Vst0),
@@ -278,7 +257,6 @@ valfun_1({label,Lbl}, #vst{current=St0,branched=B,labels=Lbls}=Vst) ->
 valfun_1(_I, #vst{current=none}=Vst) ->
     %% Ignore instructions after erlang:error/1,2, which
     %% the original R10B compiler thought would return.
-    ?DBG_FORMAT("Ignoring ~p\n", [_I]),
     Vst;
 valfun_1({badmatch,Src}, Vst) ->
     assert_term(Src, Vst),
@@ -980,9 +958,9 @@ get_fls(#vst{current=#st{fls=Fls}}) when is_atom(Fls) -> Fls.
 
 init_fregs() -> 0.
 
-set_freg({fr,Fr}, #vst{current=#st{f=Fregs0}=St}=Vst)
+set_freg({fr,Fr}=Freg, #vst{current=#st{f=Fregs0}=St}=Vst)
   when is_integer(Fr), 0 =< Fr ->
-    limit_check(Fr),
+    check_limit(Freg),
     Bit = 1 bsl Fr,
     if
 	Fregs0 band Bit =:= 0 ->
@@ -995,9 +973,10 @@ set_freg(Fr, _) -> error({bad_target,Fr}).
 assert_freg_set({fr,Fr}=Freg, #vst{current=#st{f=Fregs}})
   when is_integer(Fr), 0 =< Fr ->
     if
-	Fregs band (1 bsl Fr) =/= 0 ->
-	    limit_check(Fr);
-	true -> error({uninitialized_reg,Freg})
+	(Fregs bsr Fr) band 1 =:= 0 ->
+	    error({uninitialized_reg,Freg});
+	true ->
+	    ok
     end;
 assert_freg_set(Fr, _) -> error({bad_source,Fr}).
 
@@ -1076,16 +1055,16 @@ set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst);
 set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst);
 set_type(_, _, #vst{}=Vst) -> Vst.
 
-set_type_reg(Type, {x,X}, #vst{current=#st{x=Xs}=St}=Vst) 
+set_type_reg(Type, {x,X}=Reg, #vst{current=#st{x=Xs}=St}=Vst)
   when is_integer(X), 0 =< X ->
-    limit_check(X),
+    check_limit(Reg),
     Vst#vst{current=St#st{x=gb_trees:enter(X, Type, Xs)}};
 set_type_reg(Type, Reg, Vst) ->
     set_type_y(Type, Reg, Vst).
 
 set_type_y(Type, {y,Y}=Reg, #vst{current=#st{y=Ys0}=St}=Vst)
   when is_integer(Y), 0 =< Y ->
-    limit_check(Y),
+    check_limit(Reg),
     Ys = case gb_trees:lookup(Y, Ys0) of
 	     none ->
 		 error({invalid_store,Reg,Type});
@@ -1612,17 +1591,19 @@ return_type_math(pow, 2) -> {float,[]};
 return_type_math(pi, 0) -> {float,[]};
 return_type_math(F, A) when is_atom(F), is_integer(A), A >= 0 -> term.
 
-limit_check(Num) when is_integer(Num), Num >= ?MAXREG ->
-    error(limit);
-limit_check(_) -> ok.
+check_limit({x,X}) when is_integer(X), X < 1023 ->
+    %% Note: x(1023) is reserved for use by the BEAM loader.
+    ok;
+check_limit({y,Y}) when is_integer(Y), Y < 1024 ->
+    ok;
+check_limit({fr,Fr}) when is_integer(Fr), Fr < 1024 ->
+    ok;
+check_limit(_) ->
+    error(limit).
 
 min(A, B) when is_integer(A), is_integer(B), A < B -> A;
 min(A, B) when is_integer(A), is_integer(B) -> B.
 
 gb_trees_from_list(L) -> gb_trees:from_orddict(lists:sort(L)).
 
--ifdef(DEBUG).
-error(Error) -> exit(Error).
--else.
 error(Error) -> throw(Error).
--endif.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index e0a29fe9b1..a2a23a2b90 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -671,11 +671,16 @@ asm_passes() ->
     %% Assembly level optimisations.
     [{delay,
       [{pass,beam_a},
+       {iff,da,{listing,"a"}},
        {unless,no_postopt,
-	[{pass,beam_block},
+	[{unless,no_reorder,{pass,beam_reorder}},
+	 {iff,dre,{listing,"reorder"}},
+	 {pass,beam_block},
 	 {iff,dblk,{listing,"block"}},
 	 {unless,no_except,{pass,beam_except}},
 	 {iff,dexcept,{listing,"except"}},
+	 {unless,no_bs_opt,{pass,beam_bs}},
+	 {iff,dbs,{listing,"bs"}},
 	 {unless,no_bopt,{pass,beam_bool}},
 	 {iff,dbool,{listing,"bool"}},
 	 {unless,no_topt,{pass,beam_type}},
@@ -703,6 +708,7 @@ asm_passes() ->
        {iff,no_postopt,[{pass,beam_clean}]},
 
        {pass,beam_z},
+       {iff,dz,{listing,"z"}},
        {iff,dopt,{listing,"optimize"}},
        {iff,'S',{listing,"S"}},
        {iff,'to_asm',{done,"S"}}]},
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index afb85f4710..a2b2a1d277 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -25,6 +25,7 @@
 	     beam_asm,
 	     beam_block,
 	     beam_bool,
+	     beam_bs,
 	     beam_bsm,
 	     beam_clean,
 	     beam_dead,
@@ -37,6 +38,7 @@
 	     beam_opcodes,
 	     beam_peep,
 	     beam_receive,
+	     beam_reorder,
 	     beam_split,
 	     beam_trim,
 	     beam_type,
diff --git a/lib/compiler/src/core_lib.erl b/lib/compiler/src/core_lib.erl
index 3abb520485..839c736ff2 100644
--- a/lib/compiler/src/core_lib.erl
+++ b/lib/compiler/src/core_lib.erl
@@ -21,52 +21,16 @@
 
 -module(core_lib).
 
--deprecated({get_anno,1,next_major_release}).
--deprecated({set_anno,2,next_major_release}).
--deprecated({is_literal,1,next_major_release}).
--deprecated({is_literal_list,1,next_major_release}).
--deprecated({literal_value,1,next_major_release}).
-
--export([get_anno/1,set_anno/2]).
--export([is_literal/1,is_literal_list/1]).
--export([literal_value/1]).
 -export([make_values/1]).
 -export([is_var_used/2]).
 
 -include("core_parse.hrl").
 
-%%
-%% Generic get/set annotation that should be used only with cerl() structures.
-%%
--spec get_anno(cerl:cerl()) -> term().
-
-get_anno(C) -> cerl:get_ann(C).
-
--spec set_anno(cerl:cerl(), term()) -> cerl:cerl().
-
-set_anno(C, A) -> cerl:set_ann(C, A).
-
--spec is_literal(cerl:cerl()) -> boolean().
-
-is_literal(Cerl) ->
-    cerl:is_literal(cerl:fold_literal(Cerl)).
-
--spec is_literal_list([cerl:cerl()]) -> boolean().
-
-is_literal_list(Es) -> lists:all(fun is_literal/1, Es).
-
-%% Return the value of LitExpr.
--spec literal_value(cerl:c_literal() | cerl:c_binary() |
-		    cerl:c_map() | cerl:c_cons() | cerl:c_tuple()) -> term().
-
-literal_value(Cerl) ->
-    cerl:concrete(cerl:fold_literal(Cerl)).
-
 %% Make a suitable values structure, expr or values, depending on Expr.
 -spec make_values([cerl:cerl()] | cerl:cerl()) -> cerl:cerl().
 
 make_values([E]) -> E;
-make_values([H|_]=Es) -> #c_values{anno=get_anno(H),es=Es};
+make_values([H|_]=Es) -> #c_values{anno=cerl:get_ann(H),es=Es};
 make_values([]) -> #c_values{es=[]};
 make_values(E) -> E.
 
diff --git a/lib/compiler/src/core_lint.erl b/lib/compiler/src/core_lint.erl
index cc54f6e411..7d3513c0ba 100644
--- a/lib/compiler/src/core_lint.erl
+++ b/lib/compiler/src/core_lint.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 1999-2013. All Rights Reserved.
+%% Copyright Ericsson AB 1999-2015. All Rights Reserved.
 %% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@
 %% Define the lint state record.
 
 -record(lint, {module       :: module(),		% Current module
-	       func         :: fa(),			% Current function
+	       func         :: fa() | 'undefined',	% Current function
 	       errors  = [] :: [error()],		% Errors
 	       warnings= [] :: [warning()]}).		% Warnings
 
diff --git a/lib/compiler/src/sys_core_dsetel.erl b/lib/compiler/src/sys_core_dsetel.erl
index ac32db10fe..c6cfdbae7e 100644
--- a/lib/compiler/src/sys_core_dsetel.erl
+++ b/lib/compiler/src/sys_core_dsetel.erl
@@ -72,7 +72,7 @@ module(M0, _Options) ->
     {ok,M}.
 
 visit_module(#c_module{defs=Ds0}=R) ->
-    Env = dict:new(),
+    Env = #{},
     Ds = visit_module_1(Ds0, Env, []),
     R#c_module{defs=Ds}.
 
@@ -95,9 +95,11 @@ visit(Env, #c_var{name={_,_}}=R) ->
     {R, Env};
 visit(Env0, #c_var{name=X}=R) ->
     %% There should not be any free variables. If there are,
-    %% the next line will cause an exception.
-    {ok, N} = dict:find(X, Env0),
-    {R, dict:store(X, N+1, Env0)};
+    %% the case will fail with an exception.
+    case Env0 of
+	#{X:=N} ->
+	    {R, Env0#{X:=N+1}}
+    end;
 visit(Env, #c_literal{}=R) ->
     {R, Env};
 visit(Env0, #c_tuple{es=Es0}=R) ->
@@ -203,7 +205,7 @@ bind_vars(Vs, Env) ->
     bind_vars(Vs, Env, []).
 
 bind_vars([#c_var{name=X}|Vs], Env0, Xs)->
-    bind_vars(Vs, dict:store(X, 0, Env0), [X|Xs]);
+    bind_vars(Vs, Env0#{X=>0}, [X|Xs]);
 bind_vars([], Env,Xs) ->
     {Xs, Env}.
 
@@ -217,7 +219,7 @@ visit_pats([], Env, Vs) ->
     {Vs, Env}.
 
 visit_pat(Env0, #c_var{name=V}, Vs) ->
-    {[V|Vs], dict:store(V, 0, Env0)};
+    {[V|Vs], Env0#{V=>0}};
 visit_pat(Env0, #c_tuple{es=Es}, Vs) ->
     visit_pats(Es, Env0, Vs);
 visit_pat(Env0, #c_map{es=Es}, Vs) ->
@@ -235,23 +237,25 @@ visit_pat(Env0, #c_bitstr{val=Val,size=Sz}, Vs0) ->
 	case Sz of
 	    #c_var{name=V} ->
 		%% We don't tolerate free variables.
-		{ok, N} = dict:find(V, Env0),
-		{Vs0, dict:store(V, N+1, Env0)};
+		case Env0 of
+		    #{V:=N} ->
+			{Vs0, Env0#{V:=N+1}}
+		end;
 	    _ ->
 		visit_pat(Env0, Sz, Vs0)
 	end,
     visit_pat(Env1, Val, Vs1);
 visit_pat(Env0, #c_alias{pat=P,var=#c_var{name=V}}, Vs) ->
-    visit_pat(dict:store(V, 0, Env0), P, [V|Vs]);
+    visit_pat(Env0#{V=>0}, P, [V|Vs]);
 visit_pat(Env, #c_literal{}, Vs) ->
     {Vs, Env}.
 
 restore_vars([V|Vs], Env0, Env1) ->
-    case dict:find(V, Env0) of
-	{ok, N} ->
-	    restore_vars(Vs, Env0, dict:store(V, N, Env1));
-	error ->
-	    restore_vars(Vs, Env0, dict:erase(V, Env1))
+    case Env0 of
+	#{V:=N} ->
+	    restore_vars(Vs, Env0, Env1#{V=>N});
+	_ ->
+	    restore_vars(Vs, Env0, maps:remove(V, Env1))
     end;
 restore_vars([], _, Env1) ->
     Env1.
@@ -349,8 +353,8 @@ is_safe(#c_literal{}) -> true;
 is_safe(_) -> false.
 
 is_single_use(V, Env) ->
-    case dict:find(V, Env) of
-	{ok, 1} ->
+    case Env of
+	#{V:=1} ->
 	    true;
 	_ ->
 	    false
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index 65699ccda9..43ce9a7172 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -2793,12 +2793,18 @@ extract_type_1(Expr, Sub) ->
 	true -> bool
     end.
 
+returns_integer('band', [_,_]) -> true;
+returns_integer('bnot', [_]) -> true;
+returns_integer('bor', [_,_]) -> true;
+returns_integer('bxor', [_,_]) -> true;
 returns_integer(bit_size, [_]) -> true;
 returns_integer('bsl', [_,_]) -> true;
 returns_integer('bsr', [_,_]) -> true;
 returns_integer(byte_size, [_]) -> true;
+returns_integer('div', [_,_]) -> true;
 returns_integer(length, [_]) -> true;
 returns_integer('rem', [_,_]) -> true;
+returns_integer('round', [_]) -> true;
 returns_integer(size, [_]) -> true;
 returns_integer(tuple_size, [_]) -> true;
 returns_integer(trunc, [_]) -> true;
diff --git a/lib/compiler/src/sys_pre_expand.erl b/lib/compiler/src/sys_pre_expand.erl
index d9cc4b530c..7ab4e1845c 100644
--- a/lib/compiler/src/sys_pre_expand.erl
+++ b/lib/compiler/src/sys_pre_expand.erl
@@ -29,30 +29,26 @@
 %% Main entry point.
 -export([module/2]).
 
--import(ordsets, [from_list/1,union/2]).
 -import(lists,   [member/2,foldl/3,foldr/3]).
 
--include("../include/erl_bits.hrl").
-
 -type fa() :: {atom(), arity()}.
 
 -record(expand, {module=[],                     %Module name
                  exports=[],                    %Exports
-                 imports=[],                    %Imports
                  attributes=[],                 %Attributes
                  callbacks=[],                  %Callbacks
                  optional_callbacks=[] :: [fa()],  %Optional callbacks
-                 defined,			%Defined functions (gb_set)
                  vcount=0,                      %Variable counter
                  func=[],                       %Current function
                  arity=[],                      %Arity for current function
-                 fcount=0			%Local fun count
+                 fcount=0,			%Local fun count
+		 ctype				%Call type map
                 }).
 
 %% module(Forms, CompileOptions)
 %%      {ModuleName,Exports,TransformedForms,CompileOptions'}
-%%  Expand the forms in one module. N.B.: the lists of predefined
-%%  exports and imports are really ordsets!
+%%  Expand the forms in one module.
+%%
 %%  CompileOptions is augmented with options from -compile attributes.
 
 module(Fs0, Opts0) ->
@@ -65,19 +61,28 @@ module(Fs0, Opts0) ->
     %% Set pre-defined exported functions.
     PreExp = [{module_info,0},{module_info,1}],
 
+    %% Build the set of defined functions and the initial call
+    %% type map.
+    Defined = defined_functions(Fs, PreExp),
+    Ctype = maps:from_list([{K,local} || K <- Defined]),
+
     %% Build initial expand record.
     St0 = #expand{exports=PreExp,
-                  defined=PreExp
+		  ctype=Ctype
                  },
+
     %% Expand the functions.
-    {Tfs,St1} = forms(Fs, define_functions(Fs, St0)),
+    {Tfs,St1} = forms(Fs, St0),
+
     %% Get the correct list of exported functions.
     Exports = case member(export_all, Opts) of
-                  true -> gb_sets:to_list(St1#expand.defined);
+                  true -> Defined;
                   false -> St1#expand.exports
               end,
+    St2 = St1#expand{exports=Exports,ctype=undefined},
+
     %% Generate all functions from stored info.
-    {Ats,St3} = module_attrs(St1#expand{exports = Exports}),
+    {Ats,St3} = module_attrs(St2),
     {Mfs,St4} = module_predef_funcs(St3),
     {St4#expand.module, St4#expand.exports, Ats ++ Tfs ++ Mfs,
      Opts}.
@@ -85,14 +90,14 @@ module(Fs0, Opts0) ->
 compiler_options(Forms) ->
     lists:flatten([C || {attribute,_,compile,C} <- Forms]).
     
-%% define_function(Form, State) -> State.
+%% defined_function(Forms, Predef) -> Functions.
 %%  Add function to defined if form is a function.
 
-define_functions(Forms, #expand{defined=Predef}=St) ->
+defined_functions(Forms, Predef) ->
     Fs = foldl(fun({function,_,N,A,_Cs}, Acc) -> [{N,A}|Acc];
                   (_, Acc) -> Acc
                end, Predef, Forms),
-    St#expand{defined=gb_sets:from_list(Fs)}.
+    ordsets:from_list(Fs).
 
 module_attrs(#expand{attributes=Attributes}=St) ->
     Attrs = [{attribute,Line,Name,Val} || {Name,Line,Val} <- Attributes],
@@ -113,23 +118,21 @@ is_fa_list([{FuncName, Arity}|L])
 is_fa_list([]) -> true;
 is_fa_list(_) -> false.
 
-module_predef_funcs(St) ->
-    {Mpf1,St1}=module_predef_func_beh_info(St),
-    {Mpf2,St2}=module_predef_funcs_mod_info(St1),
+module_predef_funcs(St0) ->
+    {Mpf1,St1} = module_predef_func_beh_info(St0),
+    Mpf2 = module_predef_funcs_mod_info(St1),
     Mpf = [erl_parse:new_anno(F) || F <- Mpf1++Mpf2],
-    {Mpf,St2}.
+    {Mpf,St1}.
 
 module_predef_func_beh_info(#expand{callbacks=[]}=St) ->
     {[], St};
 module_predef_func_beh_info(#expand{callbacks=Callbacks,
                                     optional_callbacks=OptionalCallbacks,
-                                    defined=Defined,
 				    exports=Exports}=St) ->
-    PreDef=[{behaviour_info,1}],
-    PreExp=PreDef,
+    PreDef0 = [{behaviour_info,1}],
+    PreDef = ordsets:from_list(PreDef0),
     {[gen_beh_info(Callbacks, OptionalCallbacks)],
-     St#expand{defined=gb_sets:union(gb_sets:from_list(PreDef), Defined),
-	       exports=union(from_list(PreExp), Exports)}}.
+     St#expand{exports=ordsets:union(PreDef, Exports)}}.
 
 gen_beh_info(Callbacks, OptionalCallbacks) ->
     List = make_list(Callbacks),
@@ -156,20 +159,16 @@ make_optional_list([{Name,Arity}|Rest]) ->
        {integer,0,Arity}]},
      make_optional_list(Rest)}.
 
-module_predef_funcs_mod_info(St) ->
-    PreDef = [{module_info,0},{module_info,1}],
-    PreExp = PreDef,
-    {[{function,0,module_info,0,
-       [{clause,0,[],[],
+module_predef_funcs_mod_info(#expand{module=Mod}) ->
+    ModAtom = {atom,0,Mod},
+    [{function,0,module_info,0,
+      [{clause,0,[],[],
         [{call,0,{remote,0,{atom,0,erlang},{atom,0,get_module_info}},
-          [{atom,0,St#expand.module}]}]}]},
-      {function,0,module_info,1,
-       [{clause,0,[{var,0,'X'}],[],
+          [ModAtom]}]}]},
+     {function,0,module_info,1,
+      [{clause,0,[{var,0,'X'}],[],
         [{call,0,{remote,0,{atom,0,erlang},{atom,0,get_module_info}},
-          [{atom,0,St#expand.module},{var,0,'X'}]}]}]}],
-     St#expand{defined=gb_sets:union(gb_sets:from_list(PreDef),
-				     St#expand.defined),
-               exports=union(from_list(PreExp), St#expand.exports)}}.
+          [ModAtom,{var,0,'X'}]}]}]}].
 
 %% forms(Forms, State) ->
 %%      {TransformedForms,State'}
@@ -196,7 +195,8 @@ attribute(module, Module, _L, St) ->
     true = is_atom(Module),
     St#expand{module=Module};
 attribute(export, Es, _L, St) ->
-    St#expand{exports=union(from_list(Es), St#expand.exports)};
+    St#expand{exports=ordsets:union(ordsets:from_list(Es),
+				    St#expand.exports)};
 attribute(import, Is, _L, St) ->
     import(Is, St);
 attribute(compile, _C, _L, St) ->
@@ -231,8 +231,6 @@ head(As, St) -> pattern_list(As, St).
 %%      {TransformedPattern,State'}
 %%
 
-pattern({var,_,'_'}=Var, St) ->                 %Ignore anonymous variable.
-    {Var,St};
 pattern({var,_,_}=Var, St) ->
     {Var,St};
 pattern({char,_,_}=Char, St) ->
@@ -385,19 +383,19 @@ expr({block,Line,Es0}, St0) ->
     {Es,St1} = exprs(Es0, St0),
     {{block,Line,Es},St1};
 expr({'if',Line,Cs0}, St0) ->
-    {Cs,St1} = icr_clauses(Cs0, St0),
+    {Cs,St1} = clauses(Cs0, St0),
     {{'if',Line,Cs},St1};
 expr({'case',Line,E0,Cs0}, St0) ->
     {E,St1} = expr(E0, St0),
-    {Cs,St2} = icr_clauses(Cs0, St1),
+    {Cs,St2} = clauses(Cs0, St1),
     {{'case',Line,E,Cs},St2};
 expr({'receive',Line,Cs0}, St0) ->
-    {Cs,St1} = icr_clauses(Cs0, St0),
+    {Cs,St1} = clauses(Cs0, St0),
     {{'receive',Line,Cs},St1};
 expr({'receive',Line,Cs0,To0,ToEs0}, St0) ->
     {To,St1} = expr(To0, St0),
     {ToEs,St2} = exprs(ToEs0, St1),
-    {Cs,St3} = icr_clauses(Cs0, St2),
+    {Cs,St3} = clauses(Cs0, St2),
     {{'receive',Line,Cs,To,ToEs},St3};
 expr({'fun',Line,Body}, St) ->
     fun_tq(Line, Body, St);
@@ -406,21 +404,15 @@ expr({named_fun,Line,Name,Cs}, St) ->
 expr({call,Line,{atom,La,N}=Atom,As0}, St0) ->
     {As,St1} = expr_list(As0, St0),
     Ar = length(As),
-    case defined(N,Ar,St1) of
-	true ->
+    Key = {N,Ar},
+    case St1#expand.ctype of
+	#{Key:=local} ->
 	    {{call,Line,Atom,As},St1};
+	#{Key:={imported,Mod}} ->
+	    {{call,Line,{remote,La,{atom,La,Mod},Atom},As},St1};
 	_ ->
-	    case imported(N, Ar, St1) of
-		{yes,Mod} ->
-		    {{call,Line,{remote,La,{atom,La,Mod},Atom},As},St1};
-		no ->
-		    case erl_internal:bif(N, Ar) of
-			true ->
-			    {{call,Line,{remote,La,{atom,La,erlang},Atom},As},St1};
-			false -> %% This should have been handled by erl_lint
-			    {{call,Line,Atom,As},St1}
-		    end
-	    end
+	    true = erl_internal:bif(N, Ar),
+	    {{call,Line,{remote,La,{atom,La,erlang},Atom},As},St1}
     end;
 expr({call,Line,{remote,Lr,M0,F},As0}, St0) ->
     {[M1,F1|As1],St1} = expr_list([M0,F|As0], St0),
@@ -430,12 +422,11 @@ expr({call,Line,F,As0}, St0) ->
     {{call,Line,Fun1,As1},St1};
 expr({'try',Line,Es0,Scs0,Ccs0,As0}, St0) ->
     {Es1,St1} = exprs(Es0, St0),
-    {Scs1,St2} = icr_clauses(Scs0, St1),
-    {Ccs1,St3} = icr_clauses(Ccs0, St2),
+    {Scs1,St2} = clauses(Scs0, St1),
+    {Ccs1,St3} = clauses(Ccs0, St2),
     {As1,St4} = exprs(As0, St3),
     {{'try',Line,Es1,Scs1,Ccs1,As1},St4};
 expr({'catch',Line,E0}, St0) ->
-    %% Catch exports no new variables.
     {E,St1} = expr(E0, St0),
     {{'catch',Line,E},St1};
 expr({match,Line,P0,E0}, St0) ->
@@ -456,21 +447,6 @@ expr_list([E0|Es0], St0) ->
     {[E|Es],St2};
 expr_list([], St) -> {[],St}.
 
-%% icr_clauses([Clause], State) -> {[TransformedClause],State'}
-%%  Be very careful here to return the variables that are really used
-%%  and really new.
-
-icr_clauses([], St) -> {[],St};
-icr_clauses(Clauses, St) -> icr_clauses2(Clauses, St).
-
-icr_clauses2([{clause,Line,H0,G0,B0}|Cs0], St0) ->
-    {H,St1} = head(H0, St0),
-    {G,St2} = guard(G0, St1),
-    {B,St3} = exprs(B0, St2),
-    {Cs,St4} = icr_clauses2(Cs0, St3),
-    {[{clause,Line,H,G,B}|Cs],St4};
-icr_clauses2([], St) -> {[],St}.
-
 %% lc_tq(Line, Qualifiers, State) ->
 %%      {[TransQual],State'}
 
@@ -486,16 +462,9 @@ lc_tq(Line, [{b_generate,Lg,P0,G0}|Qs0], St0) ->
     {Qs1,St3} = lc_tq(Line, Qs0, St2),
     {[{b_generate,Lg,P1,G1}|Qs1],St3};
 lc_tq(Line, [F0 | Qs0], St0) ->
-    case erl_lint:is_guard_test(F0) of
-        true ->
-            {F1,St1} = guard_test(F0, St0),
-            {Qs1,St2} = lc_tq(Line, Qs0, St1),
-            {[F1|Qs1],St2};
-        false ->
-            {F1,St1} = expr(F0, St0),
-            {Qs1,St2} = lc_tq(Line, Qs0, St1),
-            {[F1 | Qs1],St2}
-    end;
+    {F1,St1} = expr(F0, St0),
+    {Qs1,St2} = lc_tq(Line, Qs0, St1),
+    {[F1|Qs1],St2};
 lc_tq(_Line, [], St0) ->
     {[],St0}.
 
@@ -527,7 +496,7 @@ fun_tq(L, {function,M,F,A}, St) when is_atom(M), is_atom(F), is_integer(A) ->
 fun_tq(Lf, {function,_,_,_}=ExtFun, St) ->
     {{'fun',Lf,ExtFun},St};
 fun_tq(Lf, {clauses,Cs0}, St0) ->
-    {Cs1,St1} = fun_clauses(Cs0, St0),
+    {Cs1,St1} = clauses(Cs0, St0),
     {Fname,St2} = new_fun_name(St1),
     %% Set dummy values for Index and Uniq -- the real values will
     %% be assigned by beam_asm.
@@ -535,18 +504,10 @@ fun_tq(Lf, {clauses,Cs0}, St0) ->
     {{'fun',Lf,{clauses,Cs1},{Index,Uniq,Fname}},St2}.
 
 fun_tq(Line, Cs0, St0, Name) ->
-    {Cs1,St1} = fun_clauses(Cs0, St0),
+    {Cs1,St1} = clauses(Cs0, St0),
     {Fname,St2} = new_fun_name(St1, Name),
     {{named_fun,Line,Name,Cs1,{0,0,Fname}},St2}.
 
-fun_clauses([{clause,L,H0,G0,B0}|Cs0], St0) ->
-    {H,St1} = head(H0, St0),
-    {G,St2} = guard(G0, St1),
-    {B,St3} = exprs(B0, St2),
-    {Cs,St4} = fun_clauses(Cs0, St3),
-    {[{clause,L,H,G,B}|Cs],St4};
-fun_clauses([], St) -> {[],St}.
-
 %% new_fun_name(State) -> {FunName,State}.
 
 new_fun_name(St) ->
@@ -571,7 +532,6 @@ pattern_element({bin_element,Line,Expr0,Size0,Type0}, {Es,St0}) ->
     {[{bin_element,Line,Expr,Size,Type}|Es],St2}.
 
 pat_bit_size(default, St) -> {default,St};
-pat_bit_size({atom,_La,all}=All, St) -> {All,St};
 pat_bit_size({var,_Lv,_V}=Var, St) -> {Var,St};
 pat_bit_size(Size, St) ->
     Line = element(2, Size),
@@ -592,8 +552,7 @@ coerce_to_float({integer,L,I}=E, [float|_]) ->
     try
         {float,L,float(I)}
     catch
-        error:badarg -> E;
-        error:badarith -> E
+        error:badarg -> E
     end;
 coerce_to_float(E, _) -> E.
     
@@ -647,25 +606,11 @@ string_to_conses(Line, Cs, Tail) ->
 
 %% import(Line, Imports, State) ->
 %%      State'
-%% imported(Name, Arity, State) ->
-%%      {yes,Module} | no
-%%  Handle import declarations and test for imported functions. No need to
-%%  check when building imports as code is correct.
+%%  Handle import declarations.
 
-import({Mod,Fs}, St) ->
+import({Mod,Fs}, #expand{ctype=Ctype0}=St) ->
     true = is_atom(Mod),
-    Mfs = from_list(Fs),
-    St#expand{imports=add_imports(Mod, Mfs, St#expand.imports)}.
-
-add_imports(Mod, [F|Fs], Is) ->
-    add_imports(Mod, Fs, orddict:store(F, Mod, Is));
-add_imports(_, [], Is) -> Is.
-
-imported(F, A, St) ->
-    case orddict:find({F,A}, St#expand.imports) of
-        {ok,Mod} -> {yes,Mod};
-        error -> no
-    end.
-
-defined(F, A, St) ->
-    gb_sets:is_element({F,A}, St#expand.defined).
+    Ctype = foldl(fun(F, A) ->
+			  A#{F=>{imported,Mod}}
+		  end, Ctype0, Fs),
+    St#expand{ctype=Ctype}.
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 34c67b16ca..5083995f30 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -827,21 +827,24 @@ select_extract_bin([{var,Hd},{var,Tl}], Size0, Unit, Type, Flags, Vf,
 		  {bs_save2,CtxReg,{Ctx,Tl}}],Int1}
 	end,
     {Es,clear_dead(Aft, I, Vdb),St};
-select_extract_bin([{var,Hd}], Size0, Unit, binary, Flags, Vf,
+select_extract_bin([{var,Hd}], Size, Unit, binary, Flags, Vf,
 		   I, Vdb, Bef, Ctx, Body, St) ->
-    SizeReg = get_bin_size_reg(Size0, Bef),
+    %% Match the last segment of a binary. We KNOW that the size
+    %% must be 'all'.
+    Size = {atom,all},				%Assertion.
     {Es,Aft} =
 	case vdb_find(Hd, Vdb) of
 	    {_,_,Lhd} when Lhd =< I ->
+		%% The result will not be used. Furthermore, since we
+		%% we are at the end of the binary, the position will
+		%% not be used again; thus, it is safe to do a cheaper
+		%% test of the unit.
 		CtxReg = fetch_var(Ctx, Bef),
-		{case SizeReg =:= {atom,all} andalso is_context_unused(Body) of
-		     true when Unit =:= 1 ->
+		{case Unit of
+		     1 ->
 			 [];
-		     true ->
-			 [{test,bs_test_unit,{f,Vf},[CtxReg,Unit]}];
-		     false ->
-			 [{test,bs_skip_bits2,{f,Vf},
-			   [CtxReg,SizeReg,Unit,{field_flags,Flags}]}]
+		     _ ->
+			 [{test,bs_test_unit,{f,Vf},[CtxReg,Unit]}]
 		 end,Bef};
 	    {_,_,_} ->
 		case is_context_unused(Body) of
@@ -853,7 +856,7 @@ select_extract_bin([{var,Hd}], Size0, Unit, binary, Flags, Vf,
 			Name = bs_get_binary2,
 			Live = max_reg(Bef#sr.reg),
 			{[{test,Name,{f,Vf},Live,
-			   [CtxReg,SizeReg,Unit,{field_flags,Flags}],Rhd}],
+			   [CtxReg,Size,Unit,{field_flags,Flags}],Rhd}],
 			 Int1};
 		    true ->
 			%% Since the matching context will not be used again,
@@ -868,7 +871,7 @@ select_extract_bin([{var,Hd}], Size0, Unit, binary, Flags, Vf,
 			Name = bs_get_binary2,
 			Live = max_reg(Int1#sr.reg),
 			{[{test,Name,{f,Vf},Live,
-			   [CtxReg,SizeReg,Unit,{field_flags,Flags}],CtxReg}],
+			   [CtxReg,Size,Unit,{field_flags,Flags}],CtxReg}],
 			 Int1}
 		end
 	end,
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
index 0941ad5dd5..7c229210a0 100644
--- a/lib/compiler/src/v3_core.erl
+++ b/lib/compiler/src/v3_core.erl
@@ -469,7 +469,8 @@ unforce_tree([#iset{var=#c_var{name=V},arg=Arg0}|Es], D0) ->
     unforce_tree(Es, D);
 unforce_tree([#icall{}=Call], D) ->
     unforce_tree_subst(Call, D);
-unforce_tree([Top], _) -> Top.
+unforce_tree([#c_var{name=V}], D) ->
+    gb_trees:get(V, D).
 
 unforce_tree_subst(#icall{module=#c_literal{val=erlang},
 			  name=#c_literal{val='=:='},
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index 7ee564683b..011748df3a 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -117,7 +117,7 @@ copy_anno(Kdst, Ksrc) ->
 	       fcount=0,			%Fun counter
                ds=cerl_sets:new() :: cerl_sets:set(), %Defined variables
 	       funs=[],				%Fun functions
-	       free=[],				%Free variables
+	       free=#{},			%Free variables
 	       ws=[]   :: [warning()],		%Warnings.
 	       guard_refc=0}).			%> 0 means in guard
 
@@ -1837,14 +1837,17 @@ handle_reuse_anno_1(V, _St) -> V.
 %% get_free(Name, Arity, State) -> [Free].
 %% store_free(Name, Arity, [Free], State) -> State.
 
-get_free(F, A, St) ->
-    case orddict:find({F,A}, St#kern.free) of
-	{ok,Val} -> Val;
-	error -> []
+get_free(F, A, #kern{free=FreeMap}) ->
+    Key = {F,A},
+    case FreeMap of
+	#{Key:=Val} -> Val;
+	_ -> []
     end.
 
-store_free(F, A, Free, St) ->
-    St#kern{free=orddict:store({F,A}, Free, St#kern.free)}.
+store_free(F, A, Free, #kern{free=FreeMap0}=St) ->
+    Key = {F,A},
+    FreeMap = FreeMap0#{Key=>Free},
+    St#kern{free=FreeMap}.
 
 break_rets({break,Rs}) -> Rs;
 break_rets(return) -> [].