1 files changed, 51 insertions, 381 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 6543e05e20..707974b2c1 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1999-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1999-2018. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -17,36 +17,27 @@
 %%
 %% %CopyrightEnd%
 %%
-%% Purpose : Partitions assembly instructions into basic blocks and
-%% optimizes them.
+%% Purpose: Partition BEAM instructions into basic blocks.
 
 -module(beam_block).
 
 -export([module/2]).
--import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
+-import(lists, [keysort/2,reverse/1,splitwith/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
 
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
+module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
     Fs = [function(F) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
 function({function,Name,Arity,CLabel,Is0}) ->
     try
-	%% Collect basic blocks and optimize them.
-	Is1 = blockify(Is0),
-	Is2 = embed_lines(Is1),
-	Is3 = move_allocates(Is2),
-	Is4 = beam_utils:live_opt(Is3),
-	Is5 = opt_blocks(Is4),
-	Is6 = beam_utils:delete_live_annos(Is5),
-
-	%% Done.
-	{function,Name,Arity,CLabel,Is6}
+        Is1 = blockify(Is0),
+        Is = embed_lines(Is1),
+        {function,Name,Arity,CLabel,Is}
     catch
-	Class:Error ->
-	    Stack = erlang:get_stacktrace(),
+        Class:Error:Stack ->
 	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
 	    erlang:raise(Class, Error, Stack)
     end.
@@ -58,14 +49,12 @@ function({function,Name,Arity,CLabel,Is0}) ->
 blockify(Is) ->
     blockify(Is, []).
 
-blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
-    %% Useless instruction sequence.
-    blockify(Is, Acc);
 blockify([I|Is0]=IsAll, Acc) ->
     case collect(I) of
 	error -> blockify(Is0, [I|Acc]);
 	Instr when is_tuple(Instr) ->
-	    {Block,Is} = collect_block(IsAll),
+            {Block0,Is} = collect_block(IsAll),
+            Block = sort_moves(Block0),
 	    blockify(Is, [{block,Block}|Acc])
     end;
 blockify([], Acc) -> reverse(Acc).
@@ -74,12 +63,10 @@ collect_block(Is) ->
     collect_block(Is, []).
 
 collect_block([{allocate,N,R}|Is0], Acc) ->
-    {Inits,Is} = lists:splitwith(fun ({init,{y,_}}) -> true;
-                                     (_) -> false
-                                 end, Is0),
+    {Inits,Is} = splitwith(fun ({init,{y,_}}) -> true;
+                               (_) -> false
+                           end, Is0),
     collect_block(Is, [{set,[],[],{alloc,R,{nozero,N,0,Inits}}}|Acc]);
-collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) ->
-    collect_block(Is, [{set,[],[],{alloc,R,{zero,Ns,Nh,[]}}}|Acc]);
 collect_block([I|Is]=Is0, Acc) ->
     case collect(I) of
 	error -> {reverse(Acc),Is0};
@@ -94,22 +81,20 @@ collect({allocate_heap,Ns,Nh,R}) -> {set,[],[],{alloc,R,{nozero,Ns,Nh,[]}}};
 collect({allocate_heap_zero,Ns,Nh,R}) -> {set,[],[],{alloc,R,{zero,Ns,Nh,[]}}};
 collect({init,D})            -> {set,[D],[],init};
 collect({test_heap,N,R})     -> {set,[],[],{alloc,R,{nozero,nostack,N,[]}}};
-collect({bif,N,F,As,D})      -> {set,[D],As,{bif,N,F}};
-collect({gc_bif,N,F,R,As,D}) -> {set,[D],As,{alloc,R,{gc_bif,N,F}}};
+collect({bif,N,{f,0},As,D})  -> {set,[D],As,{bif,N,{f,0}}};
+collect({gc_bif,N,{f,0},R,As,D}) ->   {set,[D],As,{alloc,R,{gc_bif,N,{f,0}}}};
 collect({move,S,D})          -> {set,[D],[S],move};
 collect({put_list,S1,S2,D})  -> {set,[D],[S1,S2],put_list};
 collect({put_tuple,A,D})     -> {set,[D],[],{put_tuple,A}};
 collect({put,S})             -> {set,[],[S],put};
+collect({put_tuple2,D,{list,Els}}) -> {set,[D],Els,put_tuple2};
 collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}};
 collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}};
-collect({get_list,S,D1,D2})  -> {set,[D1,D2],[S],get_list};
+collect({get_hd,S,D})  ->       {set,[D],[S],get_hd};
+collect({get_tl,S,D})  ->       {set,[D],[S],get_tl};
 collect(remove_message)      -> {set,[],[],remove_message};
-collect({put_map,F,Op,S,D,R,{list,Puts}}) ->
-    {set,[D],[S|Puts],{alloc,R,{put_map,Op,F}}};
-collect({'catch'=Op,R,L}) ->
-    {set,[R],[],{try_catch,Op,L}};
-collect({'try'=Op,R,L}) ->
-    {set,[R],[],{try_catch,Op,L}};
+collect({put_map,{f,0},Op,S,D,R,{list,Puts}}) ->
+    {set,[D],[S|Puts],{alloc,R,{put_map,Op,{f,0}}}};
 collect(fclearerror)         -> {set,[],[],fclearerror};
 collect({fcheckerror,{f,0}}) -> {set,[],[],fcheckerror};
 collect({fmove,S,D})         -> {set,[D],[S],fmove};
@@ -134,350 +119,35 @@ embed_lines([I|Is], Acc) ->
     embed_lines(Is, [I|Acc]);
 embed_lines([], Acc) -> Acc.
 
-opt_blocks([{block,Bl0}|Is]) ->
-    %% The live annotation at the beginning is not useful.
-    [{'%live',_,_}|Bl] = Bl0,
-    [{block,opt_block(Bl)}|opt_blocks(Is)];
-opt_blocks([I|Is]) ->
-    [I|opt_blocks(Is)];
-opt_blocks([]) -> [].
-
-opt_block(Is0) ->
-    Is = find_fixpoint(fun(Is) ->
-			       opt_tuple_element(opt(Is))
-		       end, Is0),
-    opt_alloc(Is).
-
-find_fixpoint(OptFun, Is0) ->
-    case OptFun(Is0) of
-	Is0 -> Is0;
-	Is1 -> find_fixpoint(OptFun, Is1)
-    end.
-
-%% move_allocates(Is0) -> Is
-%%  Move allocate instructions upwards in the instruction stream
-%%  (within the same block), in the hope of getting more possibilities
-%%  for optimizing away moves later.
-%%
-%%  For example, we can transform the following instructions:
-%%
-%%     get_tuple_element x(1) Element => x(2)
-%%     allocate_zero StackSize 3    %% x(0), x(1), x(2) are live
-%%
-%%  to the following instructions:
-%%
-%%     allocate_zero StackSize 2    %% x(0) and x(1) are live
-%%     get_tuple_element x(1) Element => x(2)
-%%
-%%  NOTE: Since the beam_reorder pass has been run, it is no longer
-%%  safe to assume that if x(N) is initialized, then all lower-numbered
-%%  x registers are also initialized.
-%%
-%%  For example, in general it is not safe to transform the following
-%%  instructions:
-%%
-%%     get_tuple_element x(0) Element => x(1)
-%%     allocate_zero StackSize 3    %x(0), x(1), x(2) are live
-%%
-%%  to the following instructions:
-%%
-%%     allocate_zero StackSize 3
-%%     get_tuple_element x(0) Element => x(1)
-%%
-%%  The transformation is safe if and only if x(1) has been
-%%  initialized previously. Unfortunately, beam_reorder may have moved
-%%  a get_tuple_element instruction so that x(1) is not always
-%%  initialized when this code is reached. To find whether or not x(1)
-%%  is initialized, we would need to analyze all code preceding these
-%%  two instructions (across branches). Since we currently don't have
-%%  any practical mechanism for doing that, we will have to
-%%  conservatively assume that the transformation is unsafe.
-
-move_allocates([{block,Bl0}|Is]) ->
-    Bl = move_allocates_1(reverse(Bl0), []),
-    [{block,Bl}|move_allocates(Is)];
-move_allocates([I|Is]) ->
-    [I|move_allocates(Is)];
-move_allocates([]) -> [].
-
-move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
-    case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
-	{false,_} ->
-	    move_allocates_1(Is, [I|Acc0]);
-	{true,not_possible} ->
-	    move_allocates_1(Is, [I|Acc0]);
-	{true,Live} when is_integer(Live) ->
-	    A = {set,[],[],{alloc,Live,Info}},
-	    move_allocates_1(Is, [A,I|Acc])
-    end;
-move_allocates_1([I|Is], Acc) ->
-    move_allocates_1(Is, [I|Acc]);
-move_allocates_1([], Acc) -> Acc.
-
-alloc_may_pass({set,_,_,{alloc,_,_}}) -> false;
-alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false;
-alloc_may_pass({set,_,_,put_list}) -> false;
-alloc_may_pass({set,_,_,put}) -> false;
-alloc_may_pass({set,_,_,_}) -> true.
-    
-%% opt([Instruction]) -> [Instruction]
-%%  Optimize the instruction stream inside a basic block.
-
-opt([{set,[X],[X],move}|Is]) -> opt(Is);
-opt([{set,_,_,{line,_}}=Line1,
-     {set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1,
-     {set,_,_,{line,_}}=Line2,
-     {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
-  when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
-    opt([Line2,I2,Line1,I1|Is]);
-opt([{set,Ds0,Ss,Op}|Is0]) ->
-    {Ds,Is} = opt_moves(Ds0, Is0),
-    [{set,Ds,Ss,Op}|opt(Is)];
-opt([{'%live',_,_}=I|Is]) ->
-    [I|opt(Is)];
-opt([]) -> [].
-
-%% opt_moves([Dest], [Instruction]) -> {[Dest],[Instruction]}
-%%  For each Dest, does the optimization described in opt_move/2.
-
-opt_moves([], Is0) -> {[],Is0};
-opt_moves([D0]=Ds, Is0) ->
-    case opt_move(D0, Is0) of
-	not_possible -> {Ds,Is0};
-	{D1,Is} -> {[D1],Is}
-    end;
-opt_moves([X0,Y0], Is0) ->
-    {X,Is2} = case opt_move(X0, Is0) of
-		  not_possible -> {X0,Is0};
-		  {Y0,_} -> {X0,Is0};
-		  {_X1,_Is1} = XIs1 -> XIs1
-	      end,
-    case opt_move(Y0, Is2) of
-	not_possible -> {[X,Y0],Is2};
-	{X,_} -> {[X,Y0],Is2};
-	{Y,Is} -> {[X,Y],Is}
-    end.
-
-%% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible
-%%  If there is a {move,Dest,FinalDest} instruction
-%%  in the instruction stream, remove the move instruction
-%%  and let FinalDest be the destination.
-
-opt_move(Dest, Is) ->
-    opt_move_1(Dest, Is, []).
-
-opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) ->
-    %% Provided that the source register is killed by instructions
-    %% that follow, the optimization is safe.
-    case eliminate_use_of_from_reg(Is0, R, D, []) of
-	{yes,Is} -> opt_move_rev(D, Acc, Is);
-	no -> not_possible
-    end;
-opt_move_1(_R, [{set,_,_,{alloc,_,_}}|_], _) ->
-    %% The optimization is either not possible or not safe.
-    %%
-    %% If R is an X register killed by allocation, the optimization is
-    %% not safe. On the other hand, if the X register is killed, there
-    %% will not follow a 'move' instruction with this X register as
-    %% the source.
-    %%
-    %% If R is a Y register, the optimization is still not safe
-    %% because the new target register is an X register that cannot
-    %% safely pass the alloc instruction.
-    not_possible;
-opt_move_1(R, [{set,_,_,_}=I|Is], Acc) ->
-    %% If the source register is either killed or used by this
-    %% instruction, the optimimization is not possible.
-    case is_killed_or_used(R, I) of
-	true -> not_possible;
-	false -> opt_move_1(R, Is, [I|Acc])
-    end;
-opt_move_1(_, _, _) ->
-    not_possible.
-
-%% opt_tuple_element([Instruction]) -> [Instruction]
-%%  If possible, move get_tuple_element instructions forward
-%%  in the instruction stream to a move instruction, eliminating
-%%  the move instruction. Example:
-%%
-%%    get_tuple_element Tuple Pos Dst1
-%%    ...
-%%    move Dst1 Dst2
-%%
-%%  This code may be possible to rewrite to:
-%%
-%%    %%(Moved get_tuple_element instruction)
-%%    ...
-%%    get_tuple_element Tuple Pos Dst2
-%%
-
-opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) ->
-    case opt_tuple_element_1(Is0, I, {S,D}, []) of
-	no ->
-	    [I|opt_tuple_element(Is0)];
-	{yes,Is} ->
-	    opt_tuple_element(Is)
-    end;
-opt_tuple_element([I|Is]) ->
-    [I|opt_tuple_element(Is)];
-opt_tuple_element([]) -> [].
-
-opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) ->
-    no;
-opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) ->
-    no;
-opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) ->
-    case eliminate_use_of_from_reg(Is0, S, D, []) of
-	no ->
-	    no;
-	{yes,Is} ->
-	    {set,[S],Ss,Op} = I0,
-	    I = {set,[D],Ss,Op},
-	    {yes,reverse(Acc, [I|Is])}
-    end;
-opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) ->
-    case member(S, Ds) orelse member(D, Ss) of
-	true ->
-	    no;
-	false ->
-	    opt_tuple_element_1(Is, MovedI, Regs, [I|Acc])
-    end;
-opt_tuple_element_1(_, _, _, _) -> no.
-
-%% Reverse the instructions, while checking that there are no
-%% instructions that would interfere with using the new destination
-%% register (D).
-
-opt_move_rev(D, [I|Is], Acc) ->
-    case is_killed_or_used(D, I) of
-	true -> not_possible;
-	false -> opt_move_rev(D, Is, [I|Acc])
-    end;
-opt_move_rev(D, [], Acc) -> {D,Acc}.
-
-%% is_killed_or_used(Register, {set,_,_,_}) -> bool()
-%%  Test whether the register is used by the instruction.
-
-is_killed_or_used(R, {set,Ss,Ds,_}) ->
-    member(R, Ds) orelse member(R, Ss).
-
-%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) ->
-%%       {yes,Is} | no
-%%  Eliminate any use of FromRegister in the instruction sequence
-%%  by replacing uses of FromRegister with ToRegister. If FromRegister
-%%  is referenced by an allocation instruction, return 'no' to indicate
-%%  that FromRegister is still used and that the optimization is not
-%%  possible.
-
-eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
-    if
-	X < Live ->
-	    no;
-	true ->
-	    {yes,reverse(Acc, Is0)}
-    end;
-eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) ->
-    I = case member(From, Ss0) of
-	    true ->
-		Ss = [case S of
-			  From -> To;
-			  _ -> S
-		      end || S <- Ss0],
-		{set,Ds,Ss,Op};
-	    false ->
-		I0
-	end,
-    case member(From, Ds) of
-	true ->
-	    {yes,reverse(Acc, [I|Is])};
-	false ->
-	    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
-    end;
-eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
-    case beam_utils:is_killed_block(From, [I]) of
-	true ->
-	    {yes,reverse(Acc, Is)};
-	false ->
-	    no
-    end.
-
-%% opt_alloc(Instructions) -> Instructions'
-%%  Optimises all allocate instructions.
-
-opt_alloc([{set,[],[],{alloc,Live0,Info0}},
-	   {set,[],[],{alloc,Live,Info}}|Is]) ->
-    Live = Live0,				%Assertion.
-    Alloc = combine_alloc(Info0, Info),
-    I = {set,[],[],{alloc,Live,Alloc}},
-    opt_alloc([I|Is]);
-opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
-    [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
-opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
-opt_alloc([]) -> [].
-
-combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]})  ->
-    {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
-	
-%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
-%%  Generates the optimal sequence of instructions for
-%%  allocating and initalizing the stack frame and needed heap.
-
-opt_alloc(_Is, nostack, Nh, LivingRegs) ->
-    {alloc,LivingRegs,{nozero,nostack,Nh,[]}};
-opt_alloc(Is, Ns, Nh, LivingRegs) ->
-    InitRegs = init_yreg(Is, 0),
-    case count_ones(InitRegs) of
-	N when N*2 > Ns ->
-	    {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
-	_ ->
-	    {alloc,LivingRegs,{zero,Ns,Nh,[]}}
+%% sort_moves([Instruction]) -> [Instruction].
+%%  Sort move instructions on the Y register to give the loader
+%%  more opportunities for combining instructions.
+
+sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) ->
+    {Moves,Is} = sort_moves_1(Is0, x, y, [I]),
+    Moves ++ sort_moves(Is);
+sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) ->
+    {Moves,Is} = sort_moves_1(Is0, y, x, [I]),
+    Moves ++ sort_moves(Is);
+sort_moves([I|Is]) ->
+    [I|sort_moves(Is)];
+sort_moves([]) -> [].
+
+sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) ->
+    %% The loader sometimes combines a move to x0 with the
+    %% instruction that follows, producing, for example, a move_call
+    %% instruction. Therefore, we don't want include this move
+    %% instruction in the sorting.
+    {sort_on_yreg(Acc)++[I],Is};
+sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) ->
+    sort_moves_1(Is, DTag, STag, [I|Acc]);
+sort_moves_1(Is, _DTag, _STag, Acc) ->
+    {sort_on_yreg(Acc),Is}.
+
+sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) ->
+    case {Dst,Src} of
+        {{y,_},{x,_}} ->
+            keysort(2, Moves);
+        {{x,_},{y,_}} ->
+            keysort(3, Moves)
     end.
-
-gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []).
-
-gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc);
-gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 ->
-    gen_init(Fs, Regs bsr 1, Y+1, [{init,{y,Y}}|Acc]);
-gen_init(Fs, Regs, Y, Acc) ->
-    gen_init(Fs, Regs bsr 1, Y+1, Acc).
-
-%% init_yreg(Instructions, RegSet) -> RegSetInitialized
-%%  Calculate the set of initialized y registers.
-
-init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
-init_yreg(_Is, Reg) -> Reg.
-
-add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
-    
-add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
-add_yreg(_, Reg)     -> Reg.
-
-count_ones(Bits) -> count_ones(Bits, 0).
-count_ones(0, Acc) -> Acc;
-count_ones(Bits, Acc) ->
-    count_ones(Bits bsr 1, Acc + (Bits band 1)).
-
-%% Calculate the new number of live registers when we move an allocate
-%% instruction upwards, passing a 'set' instruction.
-
-alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
-    Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
-    live_regs(0, Rset).
-
-live_regs(N, 0) ->
-    N;
-live_regs(N, Regs) when Regs band 1 =:= 1 ->
-    live_regs(N+1, Regs bsr 1);
-live_regs(_, _) ->
-    not_possible.
-
-x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
-x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
-x_dead([], Regs) -> Regs.
-
-x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
-x_live([_|Rs], Regs) -> x_live(Rs, Regs);
-x_live([], Regs) -> Regs.