Merge pull request #2033 from bjorng/bjorn/compiler/sharing

Add a code sharing optimization pass
author: Björn Gustavsson <[email protected]> 2018-11-28 11:53:43 +0100
committer: GitHub <[email protected]> 2018-11-28 11:53:43 +0100
commit: 489b5eddf4b9b432e4e854bad603bb7d1d5678dc (patch)
tree: 325f9d30635d34257bd8e7d199e381705da549fe
parent: da90cf437e07f0f21b15e0d0ea8ff38cbc0bb1ea (diff)
parent: 96ac99d3e9b12295b7e8a70888fd1134a78e63a8 (diff)
download: otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.gz
otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.bz2
otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.zip
12 files changed, 555 insertions, 39 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index d475e5a19a..961dacc6c9 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -69,6 +69,7 @@ MODULES =  \
 	beam_ssa_pp \
 	beam_ssa_pre_codegen \
 	beam_ssa_recv \
+	beam_ssa_share \
 	beam_ssa_type \
 	beam_kernel_to_ssa \
 	beam_trim \
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index d3a618d211..8b0e3e32f8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -101,6 +101,10 @@
 %%%      always keep the label. (beam_clean will remove any unused
 %%%      labels.)
 %%%
+%%% (7)  Replace a jump to a return instruction with a return instruction.
+%%%      Similarly, replace a jump to deallocate + return with those
+%%%      instructions.
+%%%
 %%% Note: This modules depends on (almost) all branches and jumps only
 %%% going forward, so that we can remove instructions (including definition
 %%% of labels) after any label that has not been referenced by the code
@@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
         Asm3 = share(Asm2),
         Asm4 = move(Asm3),
         Asm5 = opt(Asm4, CLabel),
-        Asm = remove_unused_labels(Asm5),
+        Asm6 = unshare(Asm5),
+        Asm = remove_unused_labels(Asm6),
         {{function,Name,Arity,CLabel,Asm},Lc}
     catch
         Class:Error:Stack ->
@@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no.
 	{
 	  entry :: beam_asm:label(), %Entry label (must not be moved).
 	  replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
-	  labels :: cerl_sets:set(),         %Set of referenced labels.
-          index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
+	  labels :: cerl_sets:set()         %Set of referenced labels.
 	}).
 
 opt(Is0, CLabel) ->
     find_fixpoint(fun(Is) ->
 			  Lbls = initial_labels(Is),
-			  St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
+			  St = #st{entry=CLabel,replace=#{},labels=Lbls},
 			  opt(Is, [], St)
 		  end, Is0).
 
@@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
 	    %% as in the jump that follows -- thus it is not needed.
 	    opt(Is, Acc, St)
     end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) ->
-    %% Similar to the above, except we have a fall-through rather than jump
-    %%    Test Label Ops
-    %%    label Label
-    case beam_utils:is_pure_test(I) of
-	false ->
-	    opt(Is, [I|Acc], label_used(Lbl, St));
-	true ->
-	    opt(Is, Acc, St)
-    end;
 opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
     case is_label_defined(Is, L) of
 	false ->
@@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is;
 drop_upto_label([_|Is]) -> drop_upto_label(Is);
 drop_upto_label([]) -> [].
 
+%% unshare([Instruction]) -> [Instruction].
+%%  Replace a jump to a return sequence (a `return` instruction
+%%  optionally preced by a `deallocate` instruction) with the return
+%%  sequence. This always saves execution time and may also save code
+%%  space (depending on the architecture). Eliminating `jump`
+%%  instructions also gives beam_trim more opportunities to trim the
+%%  stack.
+
+unshare(Is) ->
+    Short = unshare_collect_short(Is, #{}),
+    unshare_short(Is, Short).
+
+unshare_collect_short([{label,L},return|Is], Map) ->
+    unshare_collect_short(Is, Map#{L=>[return]});
+unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
+    %% `deallocate` and `return` are combined into one instruction by
+    %% the loader.
+    unshare_collect_short(Is, Map#{L=>[D,return]});
+unshare_collect_short([_|Is], Map) ->
+    unshare_collect_short(Is, Map);
+unshare_collect_short([], Map) -> Map.
+
+unshare_short([{jump,{f,F}}=I|Is], Map) ->
+    case Map of
+        #{F:=Seq} ->
+            Seq ++ unshare_short(Is, Map);
+        #{} ->
+            [I|unshare_short(Is, Map)]
+    end;
+unshare_short([I|Is], Map) ->
+    [I|unshare_short(Is, Map)];
+unshare_short([], _Map) -> [].
+
 %% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
 %%  Update the cerl_set UsedCerlSet with any function-local labels
 %%  (i.e. not with labels in call instructions) referenced by
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2323a439e9..5730e9704e 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
 peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) ->
     %% Sometimes beam_jump has missed this optimization.
     peep(Is, gb_sets:empty(), [I|Acc]);
-peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
+peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) ->
     case prune_redundant_values(Vls0, F) of
 	[] ->
 	    %% No values left. Must convert to plain jump.
 	    I = {jump,F},
 	    peep([I|Is], gb_sets:empty(), Acc0);
-        [{atom,_}=Value,Lbl] when Op =:= select_val ->
+        [{atom,_}=Value,Lbl] ->
             %% Single value left. Convert to regular test.
             Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
-        [{integer,_}=Value,Lbl] when Op =:= select_val ->
+        [{integer,_}=Value,Lbl] ->
             %% Single value left. Convert to regular test.
             Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
-        [Arity,Lbl] when Op =:= select_tuple_arity ->
-            %% Single value left. Convert to regular test
-            Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
-            peep(Is1, SeenTests0, Acc0);
 	[{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 ->
             %% Replace with is_boolean test.
             Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is],
             peep(Is1, SeenTests0, Acc0);
 	[_|_]=Vls ->
-	    I = {select,Op,R,F,Vls},
+	    I = {select,select_val,R,F,Vls},
 	    peep(Is, gb_sets:empty(), [I|Acc0])
     end;
 peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index ac2d943fef..2dda67eac6 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,8 @@
 -export([module/2]).
 
 -include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+                reverse/1,reverse/2,
                 splitwith/2,takewhile/2,unzip/1]).
 
 -spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) ->
 %%% with a cheaper instructions
 %%%
 
-ssa_opt_live(#st{ssa=Linear}=St) ->
-    St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+ssa_opt_live(#st{ssa=Linear0}=St) ->
+    RevLinear = reverse(Linear0),
+    Blocks0 = maps:from_list(RevLinear),
+    Blocks = live_opt(RevLinear, #{}, Blocks0),
+    Linear = beam_ssa:linearize(Blocks),
+    St#st{ssa=Linear}.
 
-live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
-    Successors = beam_ssa:successors(Blk0),
+live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
+    Blk1 = beam_ssa_share:block(Blk0, Blocks),
+    Successors = beam_ssa:successors(Blk1),
     Live0 = live_opt_succ(Successors, L, LiveMap0),
-    {Blk,Live} = live_opt_blk(Blk0, Live0),
+    {Blk,Live} = live_opt_blk(Blk1, Live0),
     LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
-    live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+    live_opt(Bs, LiveMap, Blocks#{L:=Blk});
 live_opt([], _, Acc) -> Acc.
 
 live_opt_succ([S|Ss], L, LiveMap) ->
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
new file mode 100644
index 0000000000..426efa2cc9
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -0,0 +1,370 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Share code for semantically equivalent blocks referred to
+%% to by `br` and `switch` instructions.
+%%
+%% A similar optimization is done in beam_jump, but doing it here as
+%% well is beneficial as it may enable other optimizations. If there
+%% are many semantically equivalent clauses, this optimization can
+%% substanstially decrease compilation times.
+%%
+%% block/2 is called from the liveness optimization pass in
+%% beam_ssa_opt, as code sharing helps the liveness pass and vice
+%% versa.
+%%
+
+-module(beam_ssa_share).
+-export([module/2,block/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [keyfind/3,reverse/1,sort/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+                    {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+    Fs = [function(F) || F <- Fs0],
+    {ok,Module#b_module{body=Fs}}.
+
+-spec block(Blk0, Blocks0) -> Blk when
+      Blk0 :: beam_ssa:b_blk(),
+      Blocks0 :: beam_ssa:block_map(),
+      Blk :: beam_ssa:b_blk().
+
+block(#b_blk{last=Last0}=Blk, Blocks) ->
+    case share_terminator(Last0, Blocks) of
+        none -> Blk;
+        Last -> Blk#b_blk{last=beam_ssa:normalize(Last)}
+    end.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+    try
+        PO = reverse(beam_ssa:rpo(Blocks0)),
+        {Blocks1,Changed} = blocks(PO, Blocks0, false),
+        Blocks = case Changed of
+                     true ->
+                         beam_ssa:trim_unreachable(Blocks1);
+                     false ->
+                         Blocks0
+                 end,
+        F#b_function{bs=Blocks}
+    catch
+        Class:Error:Stack ->
+            #{func_info:={_,Name,Arity}} = Anno,
+            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+            erlang:raise(Class, Error, Stack)
+    end.
+
+blocks([L|Ls], Blocks, Changed) ->
+    #b_blk{last=Last0} = Blk0 = map_get(L, Blocks),
+    case block(Blk0, Blocks) of
+        #b_blk{last=Last0} ->
+            blocks(Ls, Blocks, Changed);
+        #b_blk{}=Blk ->
+            blocks(Ls, Blocks#{L:=Blk}, true)
+    end;
+blocks([], Blocks, Changed) ->
+    {Blocks,Changed}.
+
+share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) ->
+    {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks),
+    {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks),
+    case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of
+        true ->
+            %% The blocks are semantically equivalent.
+            Br#b_br{succ=Succ,fail=Succ};
+        false ->
+            if
+                Succ =:= Succ0, Fail =:= Fail0 ->
+                    %% None of blocks were cut short.
+                    none;
+                true ->
+                    %% One or both labels were cut short
+                    %% to avoid jumping to an empty block.
+                    Br#b_br{succ=Succ,fail=Fail}
+            end
+    end;
+share_terminator(#b_switch{}=Sw, Blocks) ->
+    share_switch(Sw, Blocks);
+share_terminator(_Last, _Blocks) -> none.
+
+%% Test whether the two blocks are semantically equivalent.  This
+%% function is specially optimized to return `false` as fast as
+%% possible if the blocks are not equivalent, as that is the common
+%% case.
+
+are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
+    %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+    false;
+are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
+         _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
+    case {RetVal1,RetVal2} of
+        {#b_literal{},#b_literal{}} ->
+            case RetVal1 =:= RetVal2 of
+                true ->
+                    %% The return values are identical literals. We
+                    %% only need to compare the canonicalized bodies.
+                    Can1 = canonical_is(Is1),
+                    Can2 = canonical_is(Is2),
+                    Can1 =:= Can2;
+                false ->
+                    %% Non-equal literals.
+                    false
+            end;
+        {#b_var{},#b_var{}} ->
+            %% The return values are varibles. We must canonicalize
+            %% the blocks (including returns) and compare them.
+            Can1 = canonical_is(Is1 ++ [Ret1]),
+            Can2 = canonical_is(Is2 ++ [Ret2]),
+            Can1 =:= Can2;
+        {_,_} ->
+            %% One literal and one variable.
+            false
+    end;
+are_equivalent(Succ,
+         #b_blk{is=Is1,
+                last=#b_br{bool=#b_literal{val=true},
+                           succ=Target}},
+         Fail,
+         #b_blk{is=Is2,
+                last=#b_br{bool=#b_literal{val=true},
+                           succ=Target}},
+         Blocks) ->
+    %% Both blocks end with an unconditional branch to the
+    %% same target block. If the target block has phi nodes,
+    %% we must pick up the values from the phi nodes and
+    %% compare them.
+    #b_blk{is=Is} = map_get(Target, Blocks),
+    Phis1 = canonical_terminator_phis(Is, Succ),
+    Phis2 = canonical_terminator_phis(Is, Fail),
+    case {Phis1,Phis2} of
+        {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 ->
+            %% Different values are used in the phi nodes.
+            false;
+        {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 ->
+            %% Different values are used in the phi nodes.
+            false;
+        {_,_} ->
+            %% The values in the phi nodes are variables or identical
+            %% literals. We must canonicalize the blocks and compare
+            %% them.
+            Can1 = canonical_is(Is1 ++ Phis1),
+            Can2 = canonical_is(Is2 ++ Phis2),
+            Can1 =:= Can2
+    end;
+are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}},
+         Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}},
+         Blocks) ->
+    %% Two-way branches with identical failure labels. First compare the
+    %% canonicalized bodies of the blocks.
+    case canonical_is(Is1) =:= canonical_is(Is2) of
+        false ->
+            %% Different bodies.
+            false;
+        true ->
+            %% Bodies were equal. That is fairly uncommon, so to keep
+            %% the code simple we will rewrite the `br` to a `switch`
+            %% and let share_switch/2 do the work of following the
+            %% branches.
+            Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0,
+                           list=[{#b_literal{},Succ0}]},
+            #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks),
+            Fail =:= Succ
+    end;
+are_equivalent(_, _, _, _, _) -> false.
+
+share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) ->
+    Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []),
+    Res = do_share_switch(Prep, Blocks, []),
+    [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)],
+    Sw#b_switch{fail=Fail,list=List}.
+
+share_prepare_sw([{V,L0}|T], Blocks, N, Acc) ->
+    {L,_Blk} = shortcut_nonempty_block(L0, Blocks),
+    share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]);
+share_prepare_sw([], _, _, Acc) -> Acc.
+
+do_share_switch(Prep, Blocks, Acc) ->
+    Map = share_switch_1(Prep, Blocks, #{}),
+    share_switch_2(maps:values(Map), Blocks, Acc).
+
+share_switch_1([{Next0,Res}|T], Blocks, Map) ->
+    {Can,Next} = canonical_block(Next0, Blocks),
+    case Map of
+        #{Can:=Ls} ->
+            share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]});
+        #{} ->
+            share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]})
+    end;
+share_switch_1([], _Blocks, Map) -> Map.
+
+share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) ->
+    %% This block is not equivalent to any other block.
+    share_switch_2(T, Blocks, [{N,Res}|Acc]);
+share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) ->
+    %% Two or more blocks are semantically equivalent, and all blocks
+    %% are either terminated with a `ret` or a `br` to the same target
+    %% block. Replace the labels in the `switch` for all of those
+    %% blocks with the label for the first of the blocks.
+    Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0,
+    share_switch_2(T, Blocks, Acc);
+share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) ->
+    %% Two or more blocks are semantically equivalent, but they have
+    %% different successful successor blocks. Now we must check
+    %% recursively whether the successor blocks are equivalent too.
+    Acc = do_share_switch(Prep, Blocks, Acc0),
+    share_switch_2(T, Blocks, Acc);
+share_switch_2([], _, Acc) -> Acc.
+
+canonical_block({L,VarMap0}, Blocks) ->
+    #b_blk{is=Is,last=Last0} = map_get(L, Blocks),
+    case canonical_terminator(L, Last0, Blocks) of
+        none ->
+            %% The block has a terminator that we don't handle.
+            {{none,L},done};
+        {Last,done} ->
+            %% The block ends with a `ret` or an unconditional `br` to
+            %% another block.
+            {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+            {Can,done};
+        {Last,Next} ->
+            %% The block ends with a conditional branch.
+            {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+            {Can,{Next,VarMap}}
+    end.
+
+%% Translate a sequence of instructions to a canonical representation. If the
+%% canonical representation of two blocks compare equal, the blocks are
+%% semantically equivalent. The following translations are done:
+%%
+%%    * Variables defined in the instruction sequence are replaced with
+%%    {var,0}, {var,1}, and so on. Free variables are not changed.
+%%
+%%    * `location` annotations that would produce a `line` instruction are
+%%    kept. All other annotations are cleared.
+%%
+%%    * Instructions are repackaged into tuples instead of into the
+%%    usual records. The main reason is to avoid violating the types for
+%%    the SSA records. We can simplify things a little by linking the
+%%    instructions directly instead of putting them into a list.
+
+canonical_is(Is) ->
+    {Can,_} = canonical_is(Is, #{}, []),
+    Can.
+
+canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) ->
+    Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0],
+    Var = {var,map_size(VarMap0)},
+    VarMap = VarMap0#{Dst=>Var},
+    LineAnno = case Op of
+                   bs_match ->
+                       %% The location annotation for a bs_match instruction
+                       %% is only used in warnings, never to emit a `line`
+                       %% instruction. Therefore, it should not be included.
+                       [];
+                   _ ->
+                       %% The location annotation will be used in a `line`
+                       %% instruction. It must be included.
+                       beam_ssa:get_anno(location, I, none)
+               end,
+    canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc});
+canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
+    Acc1 = case Acc0 of
+               {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} ->
+                   %% This is a tail-recursive call to a local function.
+                   %% There will be no line instruction generated;
+                   %% thus, the annotation is not significant.
+                   {call,[],Var,Args,PrevAcc};
+               _ ->
+                   Acc0
+           end,
+    {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
+canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
+    {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
+    {{br,Succ,Acc},VarMap};
+canonical_is([], VarMap, Acc) ->
+    {Acc,VarMap}.
+
+canonical_terminator(_L, #b_ret{}=Ret, _Blocks) ->
+    {[Ret],done};
+canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) ->
+    #b_blk{is=Is} = map_get(Succ, Blocks),
+    case canonical_terminator_phis(Is, L) of
+        [] ->
+            {[],Succ};
+        [_|_]=Phis ->
+            {Phis ++ [Br],done}
+    end;
+canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) ->
+    {[Br],Succ};
+canonical_terminator(_, _, _) -> none.
+
+canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) ->
+    {Value,L} = keyfind(L, 2, PhiArgs),
+    [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)];
+canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) ->
+    %% We could get stuck into an infinite loop if we allowed the
+    %% comparisons to continue into this block. Force an unequal
+    %% compare with all other predecessors of this block.
+    [I#b_set{op=copy,args=[#b_literal{val=L}]}];
+canonical_terminator_phis(_, _) -> [].
+
+canonical_arg(#b_var{}=Var, VarMap) ->
+    case VarMap of
+        #{Var:=CanonicalVar} ->
+            CanonicalVar;
+        #{} ->
+            Var
+    end;
+canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) ->
+    {remote,canonical_arg(Mod, VarMap),
+     canonical_arg(Name, VarMap)};
+canonical_arg(Other, _VarMap) -> Other.
+
+%% Shortcut branches to empty blocks if safe.
+
+shortcut_nonempty_block(L, Blocks) ->
+    case map_get(L, Blocks) of
+        #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk ->
+            %% This block is empty.
+            case is_forbidden(Succ, Blocks) of
+                false ->
+                    shortcut_nonempty_block(Succ, Blocks);
+                true ->
+                    {L,Blk}
+            end;
+        #b_blk{}=Blk ->
+            {L,Blk}
+    end.
+
+is_forbidden(L, Blocks) ->
+    case map_get(L, Blocks) of
+        #b_blk{is=[#b_set{op=phi}|_]} -> true;
+        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+        #b_blk{} -> false
+    end.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 65c4f140c9..14c8c5b4ab 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
      {pass,beam_kernel_to_ssa},
      {iff,dssa,{listing,"ssa"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
+     {unless,no_share_opt,{pass,beam_ssa_share}},
+     {iff,dssashare,{listing,"ssashare"}},
+     {iff,ssalint,{pass,beam_ssa_lint}},
      {unless,no_bsm_opt,{pass,beam_ssa_bsm}},
      {iff,dssabsm,{listing,"ssabsm"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
@@ -2098,6 +2101,7 @@ pre_load() ->
 	 beam_ssa_opt,
 	 beam_ssa_pre_codegen,
 	 beam_ssa_recv,
+	 beam_ssa_share,
 	 beam_ssa_type,
 	 beam_trim,
 	 beam_utils,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 86259270bd..1472e3fde1 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -45,6 +45,7 @@
              beam_ssa_pp,
              beam_ssa_pre_codegen,
              beam_ssa_recv,
+             beam_ssa_share,
              beam_ssa_type,
 	     beam_trim,
 	     beam_utils,
diff --git a/lib/compiler/test/beam_jump_SUITE.erl b/lib/compiler/test/beam_jump_SUITE.erl
index 40eb6f06c3..759d884dc4 100644
--- a/lib/compiler/test/beam_jump_SUITE.erl
+++ b/lib/compiler/test/beam_jump_SUITE.erl
@@ -22,7 +22,8 @@
 -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
 	 init_per_group/2,end_per_group/2,
 	 undefined_label/1,ambiguous_catch_try_state/1,
-         unsafe_move_elimination/1,build_tuple/1]).
+         unsafe_move_elimination/1,build_tuple/1,
+         coverage/1]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]}].
@@ -35,7 +36,8 @@ groups() ->
       [undefined_label,
        ambiguous_catch_try_state,
        unsafe_move_elimination,
-       build_tuple
+       build_tuple,
+       coverage
       ]}].
 
 init_per_suite(Config) ->
@@ -126,6 +128,44 @@ do_build_tuple(Message) ->
 	    {Message#message3.id, Res}
     end.
 
+coverage(_Config) ->
+    ok = coverage_1(ok),
+    {error,badarg} = coverage_1({error,badarg}),
+
+    gt = coverage_2(100, 42),
+    le = coverage_2(100, 999),
+    le = coverage_2([], []),
+    gt = coverage_2([], xxx),
+
+    ok.
+
+coverage_1(Var) ->
+    case id(Var) of
+	ok -> ok;
+	Error -> Error
+    end.
+
+%% Cover beam_jump:invert_test(is_ne_exact).
+coverage_2(Pre1, Pre2) ->
+    case
+        case Pre1 == [] of
+            false ->
+                false;
+            true ->
+                Pre2 /= []
+        end
+    of
+        true ->
+            gt;
+        false ->
+            case Pre1 > Pre2 of
+                true ->
+                    gt;
+                false ->
+                    le
+            end
+    end.
+
 
 id(I) ->
     I.
diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl
index 5536abbdde..e32e3eebfc 100644
--- a/lib/compiler/test/beam_ssa_SUITE.erl
+++ b/lib/compiler/test/beam_ssa_SUITE.erl
@@ -22,7 +22,7 @@
 -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
 	 init_per_group/2,end_per_group/2,
          calls/1,tuple_matching/1,recv/1,maps/1,
-         cover_ssa_dead/1,combine_sw/1]).
+         cover_ssa_dead/1,combine_sw/1,share_opt/1]).
 
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
@@ -36,7 +36,8 @@ groups() ->
        recv,
        maps,
        cover_ssa_dead,
-       combine_sw
+       combine_sw,
+       share_opt
       ]}].
 
 init_per_suite(Config) ->
@@ -467,5 +468,18 @@ do_comb_sw_2(X) ->
     end,
     erase(?MODULE).
 
+share_opt(_Config) ->
+    ok = do_share_opt(0).
+
+do_share_opt(A) ->
+    %% The compiler would be stuck in an infinite loop in beam_ssa_share.
+    case A of
+        0 -> a;
+        1 -> b;
+        2 -> c
+    end,
+    receive after 1 -> ok end.
+
+
 %% The identity function.
 id(I) -> I.
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 8d8bbe9543..6eae7b1404 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -1250,7 +1250,6 @@ do_opt_guards_fun([]) -> [].
 is_exception(guard_SUITE, {'-complex_not/1-fun-4-',1}) -> true;
 is_exception(guard_SUITE, {'-complex_not/1-fun-5-',1}) -> true;
 is_exception(guard_SUITE, {bad_guards,1}) -> true;
-is_exception(guard_SUITE, {bad_guards_3,2}) -> true;
 is_exception(guard_SUITE, {nested_not_2b,4}) -> true;
 is_exception(_, _) -> false.
 
diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl
index 229c3093d7..eed2a31f70 100644
--- a/lib/compiler/test/match_SUITE.erl
+++ b/lib/compiler/test/match_SUITE.erl
@@ -483,9 +483,8 @@ sel_same_value2(V) when V =:= 42; V =:= 43 ->
 sel_same_value2(_) ->
     error.
 
-%% Test deconstruction of select_val instructions in beam_peep into
-%% regular tests with just one possible value left. Hitting proper cases
-%% in beam_peep relies on unification of labels by beam_jump.
+%% Test deconstruction of select_val instructions to regular tests
+%% with zero or one values left.
 
 deselectify(Config) when is_list(Config) ->
     one_or_other = desel_tuple_arity({1}),
@@ -506,7 +505,31 @@ deselectify(Config) when is_list(Config) ->
 
     one_or_other = dsel_atom_typecheck(one),
     two = dsel_atom_typecheck(two),
-    one_or_other = dsel_atom_typecheck(three).
+    one_or_other = dsel_atom_typecheck(three),
+
+    %% Cover deconstruction of select_val instructions in
+    %% beam_peep.
+
+    stop = dsel_peek_0(stop),
+    ignore = dsel_peek_0(ignore),
+    Config = dsel_peek_0(Config),
+
+    stop = dsel_peek_1(stop, any),
+    Config = dsel_peek_1(ignore, Config),
+    other = dsel_peek_1(other, ignored),
+
+    0 = dsel_peek_2(0, any),
+    Config = dsel_peek_2(1, Config),
+    2 = dsel_peek_2(2, ignored),
+
+    true = dsel_peek_3(true),
+    false = dsel_peek_3(false),
+    {error,Config} = dsel_peek_3(Config),
+
+    ok.
+
+%% The following will be optimized by the sharing optimizations
+%% in beam_ssa_opt.
 
 desel_tuple_arity(Tuple) when is_tuple(Tuple) ->
     case Tuple of
@@ -543,6 +566,39 @@ dsel_atom_typecheck(Val) when is_atom(Val) ->
         _ -> one_or_other
     end.
 
+%% The following functions are carefully crafted so that the sharing
+%% optimizations in beam_ssa_opt can't be applied. After applying the
+%% beam_jump:eliminate_moves/1 optimization and beam_clean:clean_labels/1
+%% has unified labels, beam_peep is able to optimize these functions.
+
+dsel_peek_0(A0) ->
+    case id(A0) of
+        stop ->   stop;
+        ignore -> ignore;
+        A ->      A
+    end.
+
+dsel_peek_1(A0, B) ->
+    case id(A0) of
+        stop ->   stop;
+        ignore -> B;
+        A ->      A
+    end.
+
+dsel_peek_2(A0, B) ->
+    case id(A0) of
+        0 -> 0;
+        1 -> B;
+        A -> A
+    end.
+
+dsel_peek_3(A0) ->
+    case id(A0) of
+        true ->  true;
+        false -> false;
+        Other -> {error,Other}
+    end.
+
 underscore(Config) when is_list(Config) ->
     case Config of
 	[] ->
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index d6fc51448f..2a6303ece8 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -183,6 +183,7 @@ silly_coverage(Config) when is_list(Config) ->
 
     %% beam_ssa_lint
     %% beam_ssa_recv
+    %% beam_ssa_share
     %% beam_ssa_pre_codegen
     %% beam_ssa_opt
     %% beam_ssa_codegen
@@ -190,6 +191,7 @@ silly_coverage(Config) when is_list(Config) ->
               [{b_function,#{func_info=>{mod,foo,0}},args,bad_blocks,0}]},
     expect_error(fun() -> beam_ssa_lint:module(BadSSA, []) end),
     expect_error(fun() -> beam_ssa_recv:module(BadSSA, []) end),
+    expect_error(fun() -> beam_ssa_share:module(BadSSA, []) end),
     expect_error(fun() -> beam_ssa_pre_codegen:module(BadSSA, []) end),
     expect_error(fun() -> beam_ssa_opt:module(BadSSA, []) end),
     expect_error(fun() -> beam_ssa_codegen:module(BadSSA, []) end),
@@ -258,7 +260,7 @@ silly_coverage(Config) when is_list(Config) ->
 		 [{function,foo,0,2,
 		   [{label,1},
 		    {func_info,{atom,?MODULE},{atom,foo},0},
-		    {label,2},{select,op,r,{f,2},[{f,2}]}]}],
+		    {label,2},{select,select_val,r,{f,2},[{f,2}]}]}],
 		 2},
     expect_error(fun() -> beam_peep:module(PeepInput, []) end),
author	Björn Gustavsson <[email protected]>	2018-11-28 11:53:43 +0100
committer	GitHub <[email protected]>	2018-11-28 11:53:43 +0100
commit	489b5eddf4b9b432e4e854bad603bb7d1d5678dc (patch)
tree	325f9d30635d34257bd8e7d199e381705da549fe
parent	da90cf437e07f0f21b15e0d0ea8ff38cbc0bb1ea (diff)
parent	96ac99d3e9b12295b7e8a70888fd1134a78e63a8 (diff)
download	otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.gz otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.bz2 otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.zip