5 files changed, 123 insertions, 62 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 85d332c56e..ec41925beb 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -159,14 +159,43 @@ find_fixpoint(OptFun, Is0) ->
     end.
 
 %% move_allocates(Is0) -> Is
-%%  Move allocate instructions upwards in the instruction stream, in the
-%%  hope of getting more possibilities for optimizing away moves later.
+%%  Move allocate instructions upwards in the instruction stream
+%%  (within the same block), in the hope of getting more possibilities
+%%  for optimizing away moves later.
 %%
-%%  NOTE: Moving allocation instructions is only safe because it is done
-%%  immediately after code generation so that we KNOW that if {x,X} is
-%%  initialized, all x registers with lower numbers are also initialized.
-%%  That assumption may not be true after other optimizations, such as
-%%  the beam_utils:live_opt/1 optimization.
+%%  For example, we can transform the following instructions:
+%%
+%%     get_tuple_element x(1) Element => x(2)
+%%     allocate_zero StackSize 3    %% x(0), x(1), x(2) are live
+%%
+%%  to the following instructions:
+%%
+%%     allocate_zero StackSize 2    %% x(0) and x(1) are live
+%%     get_tuple_element x(1) Element => x(2)
+%%
+%%  NOTE: Since the beam_reorder pass has been run, it is no longer
+%%  safe to assume that if x(N) is initialized, then all lower-numbered
+%%  x registers are also initialized.
+%%
+%%  For example, in general it is not safe to transform the following
+%%  instructions:
+%%
+%%     get_tuple_element x(0) Element => x(1)
+%%     allocate_zero StackSize 3    %x(0), x(1), x(2) are live
+%%
+%%  to the following instructions:
+%%
+%%     allocate_zero StackSize 3
+%%     get_tuple_element x(0) Element => x(1)
+%%
+%%  The transformation is safe if and only if x(1) has been
+%%  initialized previously. Unfortunately, beam_reorder may have moved
+%%  a get_tuple_element instruction so that x(1) is not always
+%%  initialized when this code is reached. To find whether or not x(1)
+%%  is initialized, we would need to analyze all code preceding these
+%%  two instructions (across branches). Since we currently don't have
+%%  any practical mechanism for doing that, we will have to
+%%  conservatively assume that the transformation is unsafe.
 
 move_allocates([{block,Bl0}|Is]) ->
     Bl = move_allocates_1(reverse(Bl0), []),
@@ -175,27 +204,19 @@ move_allocates([I|Is]) ->
     [I|move_allocates(Is)];
 move_allocates([]) -> [].
 
-move_allocates_1([{set,[],[],{alloc,_,_}=Alloc}|Is0], Acc0) ->
-    {Is,Acc} = move_allocates_2(Alloc, Is0, Acc0),
-    move_allocates_1(Is, Acc);
+move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
+    case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
+	{false,_} ->
+	    move_allocates_1(Is, [I|Acc0]);
+	{true,not_possible} ->
+	    move_allocates_1(Is, [I|Acc0]);
+	{true,Live} when is_integer(Live) ->
+	    A = {set,[],[],{alloc,Live,Info}},
+	    move_allocates_1(Is, [A,I|Acc])
+    end;
 move_allocates_1([I|Is], Acc) ->
     move_allocates_1(Is, [I|Acc]);
-move_allocates_1([], Is) -> Is.
-
-move_allocates_2({alloc,Live,Info}, [{set,[],[],{alloc,Live0,Info0}}|Is], Acc) ->
-    Live = Live0,				% Assertion.
-    Alloc = {alloc,Live,combine_alloc(Info0, Info)},
-    move_allocates_2(Alloc, Is, Acc);
-move_allocates_2({alloc,Live,Info}=Alloc0, [I|Is]=Is0, Acc) ->
-    case alloc_may_pass(I) of
-	false ->
-	    {Is0,[{set,[],[],Alloc0}|Acc]};
-	true ->
-	    Alloc = {alloc,alloc_live_regs(I, Live),Info},
-	    move_allocates_2(Alloc, Is, [I|Acc])
-    end;
-move_allocates_2(Alloc, [], Acc) ->
-    {[],[{set,[],[],Alloc}|Acc]}.
+move_allocates_1([], Acc) -> Acc.
 
 alloc_may_pass({set,_,_,{alloc,_,_}}) -> false;
 alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false;
@@ -204,9 +225,6 @@ alloc_may_pass({set,_,_,put_list}) -> false;
 alloc_may_pass({set,_,_,put}) -> false;
 alloc_may_pass({set,_,_,_}) -> true.
     
-combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]})  ->
-    {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
-
 %% opt([Instruction]) -> [Instruction]
 %%  Optimize the instruction stream inside a basic block.
 
@@ -393,10 +411,19 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
 %% opt_alloc(Instructions) -> Instructions'
 %%  Optimises all allocate instructions.
 
+opt_alloc([{set,[],[],{alloc,Live0,Info0}},
+	   {set,[],[],{alloc,Live,Info}}|Is]) ->
+    Live = Live0,				%Assertion.
+    Alloc = combine_alloc(Info0, Info),
+    I = {set,[],[],{alloc,Live,Alloc}},
+    opt_alloc([I|Is]);
 opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
     [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
 opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
 opt_alloc([]) -> [].
+
+combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]})  ->
+    {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
 	
 %% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
 %%  Generates the optimal sequence of instructions for
@@ -445,13 +472,14 @@ count_ones(Bits, Acc) ->
 
 alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
     Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
-    live_regs(Rset).
+    live_regs(0, Rset).
 
-live_regs(Regs) ->
-    live_regs_1(0, Regs).
-
-live_regs_1(N, 0) -> N;
-live_regs_1(N, Regs) -> live_regs_1(N+1, Regs bsr 1).
+live_regs(N, 0) ->
+    N;
+live_regs(N, Regs) when Regs band 1 =:= 1 ->
+    live_regs(N+1, Regs bsr 1);
+live_regs(_, _) ->
+    not_possible.
 
 x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
 x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index 6f6d742293..3606af9d75 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -272,14 +272,18 @@ backward([{jump,{f,To}}=J|[{bif,Op,_,Ops,Reg}|Is]=Is0], D, Acc) ->
     catch
 	throw:not_possible -> backward(Is0, D, [J|Acc])
     end;
-backward([{test,bs_start_match2,F,_,[R,_],Ctxt}=I|Is], D,
+backward([{test,bs_start_match2,F,Live,[R,_]=Args,Ctxt}|Is], D,
 	 [{test,bs_match_string,F,[Ctxt,Bs]},
 	  {test,bs_test_tail2,F,[Ctxt,0]}|Acc0]=Acc) ->
+    {f,To0} = F,
     case beam_utils:is_killed(Ctxt, Acc0, D) of
 	true ->
-	    Eq = {test,is_eq_exact,F,[R,{literal,Bs}]},
+	    To = shortcut_bs_context_to_binary(To0, R, D),
+	    Eq = {test,is_eq_exact,{f,To},[R,{literal,Bs}]},
 	    backward(Is, D, [Eq|Acc0]);
 	false ->
+	    To = shortcut_bs_start_match(To0, R, D),
+	    I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
 	    backward(Is, D, [I|Acc])
     end;
 backward([{test,bs_start_match2,{f,To0},Live,[Src|_]=Info,Dst}|Is], D, Acc) ->
@@ -551,6 +555,21 @@ shortcut_bs_start_match_1([{test,bs_start_match2,{f,To},_,[Reg|_],_}|_],
 shortcut_bs_start_match_1(_, _, To, _) ->
     To.
 
+%% shortcut_bs_context_to_binary(TargetLabel, Reg) -> TargetLabel
+%%  If a bs_start_match2 instruction has been eliminated, the
+%%  bs_context_to_binary instruction can be eliminated too.
+
+shortcut_bs_context_to_binary(To, Reg, D) ->
+    shortcut_bs_ctb_1(beam_utils:code_at(To, D), Reg, To, D).
+
+shortcut_bs_ctb_1([{bs_context_to_binary,Reg}|Is], Reg, To, D) ->
+    shortcut_bs_ctb_1(Is, Reg, To, D);
+shortcut_bs_ctb_1([{jump,{f,To}}|_], Reg, _, D) ->
+    Code = beam_utils:code_at(To, D),
+    shortcut_bs_ctb_1(Code, Reg, To, D);
+shortcut_bs_ctb_1(_, _, To, _) ->
+    To.
+
 %% shortcut_rel_op(FailLabel, Operator, [Operand], D) -> FailLabel'
 %%  Try to shortcut the given test instruction. Example:
 %%
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index 09cd3aa2d4..48b5a32814 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -167,12 +167,18 @@ share_1([{label,L}=Lbl|Is], Dict0, Seq, Acc) ->
     end;
 share_1([{func_info,_,_,_}=I|Is], _, [], Acc) ->
     reverse(Is, [I|Acc]);
+share_1([{'catch',_,_}=I|Is], Dict0, Seq, Acc) ->
+    Dict = clean_non_sharable(Dict0),
+    share_1(Is, Dict, [I|Seq], Acc);
 share_1([{'try',_,_}=I|Is], Dict0, Seq, Acc) ->
     Dict = clean_non_sharable(Dict0),
     share_1(Is, Dict, [I|Seq], Acc);
 share_1([{try_case,_}=I|Is], Dict0, Seq, Acc) ->
     Dict = clean_non_sharable(Dict0),
     share_1(Is, Dict, [I|Seq], Acc);
+share_1([{catch_end,_}=I|Is], Dict0, Seq, Acc) ->
+    Dict = clean_non_sharable(Dict0),
+    share_1(Is, Dict, [I|Seq], Acc);
 share_1([I|Is], Dict, Seq, Acc) ->
     case is_unreachable_after(I) of
 	false ->
@@ -182,18 +188,18 @@ share_1([I|Is], Dict, Seq, Acc) ->
     end.
 
 clean_non_sharable(Dict) ->
-    %% We are passing in or out of a 'try' block. Remove
-    %% sequences that should not shared over the boundaries
-    %% of a 'try' block. Since the end of the sequence must match,
-    %% the only possible match between a sequence outside and
-    %% a sequence inside the 'try' block is a sequence that ends
-    %% with an instruction that causes an exception. Any sequence
-    %% that causes an exception must contain a line/1 instruction.
+    %% We are passing in or out of a 'catch' or 'try' block. Remove
+    %% sequences that should not be shared over the boundaries of the
+    %% block. Since the end of the sequence must match, the only
+    %% possible match between a sequence outside and a sequence inside
+    %% the 'catch'/'try' block is a sequence that ends with an
+    %% instruction that causes an exception. Any sequence that causes
+    %% an exception must contain a line/1 instruction.
     maps:filter(fun(K, _V) -> sharable_with_try(K) end, Dict).
 
 sharable_with_try([{line,_}|_]) ->
     %% This sequence may cause an exception and may potentially
-    %% match a sequence on the other side of the 'try' block
+    %% match a sequence on the other side of the 'catch'/'try' block
     %% boundary.
     false;
 sharable_with_try([_|Is]) ->
diff --git a/lib/compiler/src/sys_pre_expand.erl b/lib/compiler/src/sys_pre_expand.erl
index 7ab4e1845c..f996a2d2d7 100644
--- a/lib/compiler/src/sys_pre_expand.erl
+++ b/lib/compiler/src/sys_pre_expand.erl
@@ -520,9 +520,8 @@ new_fun_name(#expand{func=F,arity=A,fcount=I}=St, FName) ->
 
 %% pattern_bin([Element], State) -> {[Element],[Variable],[UsedVar],State}.
 
-pattern_bin(Es0, St) ->
-    Es1 = bin_expand_strings(Es0),
-    foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es1).
+pattern_bin(Es, St) ->
+    foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es).
 
 pattern_element({bin_element,Line,Expr0,Size0,Type0}, {Es,St0}) ->
     {Expr1,St1} = pattern(Expr0, St0),
@@ -558,9 +557,8 @@ coerce_to_float(E, _) -> E.
     
 %% expr_bin([Element], State) -> {[Element],State}.
 
-expr_bin(Es0, St) ->
-    Es1 = bin_expand_strings(Es0),
-    foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es1).
+expr_bin(Es, St) ->
+    foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es).
 
 bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) ->
     {Expr1,St1} = expr(Expr, St0),
@@ -570,14 +568,6 @@ bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) ->
     {Size2,Type1} = make_bit_type(Line, Size1, Type),
     {[{bin_element,Line,Expr1,Size2,Type1}|Es],St2}.
 
-bin_expand_strings(Es) ->
-    foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) ->
-                  foldr(fun (C, Es2) ->
-                                [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2]
-                        end, Es1, S);
-              (E, Es1) -> [E|Es1]
-          end, [], Es).
-
 %% new_var_name(State) -> {VarName,State}.
 
 new_var_name(St) ->
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
index d71411de80..634ec68736 100644
--- a/lib/compiler/src/v3_core.erl
+++ b/lib/compiler/src/v3_core.erl
@@ -901,7 +901,7 @@ try_after(As, St0) ->
 expr_bin(Es0, Anno, St0) ->
     case constant_bin(Es0) of
 	error ->
-	    {Es,Eps,St} = expr_bin_1(Es0, St0),
+	    {Es,Eps,St} = expr_bin_1(bin_expand_strings(Es0), St0),
 	    {#ibinary{anno=#a{anno=Anno},segments=Es},Eps,St};
 	Bin ->
 	    {#c_literal{anno=Anno,val=Bin},[],St0}
@@ -923,7 +923,8 @@ constant_bin(Es) ->
 constant_bin_1(Es) ->
     verify_suitable_fields(Es),
     EmptyBindings = erl_eval:new_bindings(),
-    EvalFun = fun({integer,_,I}, B) -> {value,I,B};
+    EvalFun = fun({string,_,S}, B) -> {value,S,B};
+		 ({integer,_,I}, B) -> {value,I,B};
 		 ({char,_,C}, B) -> {value,C,B};
 		 ({float,_,F}, B) -> {value,F,B};
 		 ({atom,_,undefined}, B) -> {value,undefined,B}
@@ -944,6 +945,9 @@ verify_suitable_fields([{bin_element,_,Val,SzTerm,Opts}|Es]) ->
     end,
     {unit,Unit} = keyfind(unit, 1, Opts),
     case {SzTerm,Val} of
+	{{atom,_,undefined},{string,_,_}} ->
+	    %% UTF-8/16/32.
+	    ok;
 	{{atom,_,undefined},{char,_,_}} ->
 	    %% UTF-8/16/32.
 	    ok;
@@ -983,6 +987,14 @@ count_bits(Int) ->
 count_bits_1(0, Bits) -> Bits;
 count_bits_1(Int, Bits) -> count_bits_1(Int bsr 64, Bits+64).
 
+bin_expand_strings(Es) ->
+    foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) ->
+		  foldr(fun (C, Es2) ->
+				[{bin_element,Line,{char,Line,C},Sz,Ts}|Es2]
+			end, Es1, S);
+	      (E, Es1) -> [E|Es1]
+	  end, [], Es).
+
 expr_bin_1(Es, St) ->
     foldr(fun (E, {Ces,Esp,St0}) ->
 		  {Ce,Ep,St1} = bitstr(E, St0),
@@ -1394,6 +1406,9 @@ bc_elem_size({bin,_,El}, St0) ->
 bc_elem_size(_, _) ->
     throw(impossible).
 
+bc_elem_size_1([{bin_element,_,{string,_,String},{integer,_,N},Flags}|Es], Bits, Vars) ->
+    {unit,U} = keyfind(unit, 1, Flags),
+    bc_elem_size_1(Es, Bits+U*N*length(String), Vars);
 bc_elem_size_1([{bin_element,_,_,{integer,_,N},Flags}|Es], Bits, Vars) ->
     {unit,U} = keyfind(unit, 1, Flags),
     bc_elem_size_1(Es, Bits+U*N, Vars);
@@ -1513,6 +1528,9 @@ bc_list_length(_, _) ->
 bc_bin_size({bin,_,Els}) ->
     bc_bin_size_1(Els, 0).
 
+bc_bin_size_1([{bin_element,_,{string,_,String},{integer,_,Sz},Flags}|Els], N) ->
+    {unit,U} = keyfind(unit, 1, Flags),
+    bc_bin_size_1(Els, N+U*Sz*length(String));
 bc_bin_size_1([{bin_element,_,_,{integer,_,Sz},Flags}|Els], N) ->
     {unit,U} = keyfind(unit, 1, Flags),
     bc_bin_size_1(Els, N+U*Sz);
@@ -1736,7 +1754,7 @@ pat_alias_map_pairs_1([]) -> [].
 
 %% pat_bin([BinElement], State) -> [BinSeg].
 
-pat_bin(Ps, St) -> [pat_segment(P, St) || P <- Ps].
+pat_bin(Ps, St) -> [pat_segment(P, St) || P <- bin_expand_strings(Ps)].
 
 pat_segment({bin_element,L,Val,Size,[Type,{unit,Unit}|Flags]}, St) ->
     Anno = lineno_anno(L, St),