aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2018-11-28 11:53:43 +0100
committerGitHub <[email protected]>2018-11-28 11:53:43 +0100
commit489b5eddf4b9b432e4e854bad603bb7d1d5678dc (patch)
tree325f9d30635d34257bd8e7d199e381705da549fe
parentda90cf437e07f0f21b15e0d0ea8ff38cbc0bb1ea (diff)
parent96ac99d3e9b12295b7e8a70888fd1134a78e63a8 (diff)
downloadotp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.gz
otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.bz2
otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.zip
Merge pull request #2033 from bjorng/bjorn/compiler/sharing
Add a code sharing optimization pass
-rw-r--r--lib/compiler/src/Makefile1
-rw-r--r--lib/compiler/src/beam_jump.erl55
-rw-r--r--lib/compiler/src/beam_peep.erl12
-rw-r--r--lib/compiler/src/beam_ssa_opt.erl20
-rw-r--r--lib/compiler/src/beam_ssa_share.erl370
-rw-r--r--lib/compiler/src/compile.erl4
-rw-r--r--lib/compiler/src/compiler.app.src1
-rw-r--r--lib/compiler/test/beam_jump_SUITE.erl44
-rw-r--r--lib/compiler/test/beam_ssa_SUITE.erl18
-rw-r--r--lib/compiler/test/compile_SUITE.erl1
-rw-r--r--lib/compiler/test/match_SUITE.erl64
-rw-r--r--lib/compiler/test/misc_SUITE.erl4
12 files changed, 555 insertions, 39 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index d475e5a19a..961dacc6c9 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -69,6 +69,7 @@ MODULES = \
beam_ssa_pp \
beam_ssa_pre_codegen \
beam_ssa_recv \
+ beam_ssa_share \
beam_ssa_type \
beam_kernel_to_ssa \
beam_trim \
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index d3a618d211..8b0e3e32f8 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -101,6 +101,10 @@
%%% always keep the label. (beam_clean will remove any unused
%%% labels.)
%%%
+%%% (7) Replace a jump to a return instruction with a return instruction.
+%%% Similarly, replace a jump to deallocate + return with those
+%%% instructions.
+%%%
%%% Note: This modules depends on (almost) all branches and jumps only
%%% going forward, so that we can remove instructions (including definition
%%% of labels) after any label that has not been referenced by the code
@@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
Asm3 = share(Asm2),
Asm4 = move(Asm3),
Asm5 = opt(Asm4, CLabel),
- Asm = remove_unused_labels(Asm5),
+ Asm6 = unshare(Asm5),
+ Asm = remove_unused_labels(Asm6),
{{function,Name,Arity,CLabel,Asm},Lc}
catch
Class:Error:Stack ->
@@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no.
{
entry :: beam_asm:label(), %Entry label (must not be moved).
replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
- labels :: cerl_sets:set(), %Set of referenced labels.
- index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed
+ labels :: cerl_sets:set() %Set of referenced labels.
}).
opt(Is0, CLabel) ->
find_fixpoint(fun(Is) ->
Lbls = initial_labels(Is),
- St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}},
+ St = #st{entry=CLabel,replace=#{},labels=Lbls},
opt(Is, [], St)
end, Is0).
@@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
%% as in the jump that follows -- thus it is not needed.
opt(Is, Acc, St)
end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) ->
- %% Similar to the above, except we have a fall-through rather than jump
- %% Test Label Ops
- %% label Label
- case beam_utils:is_pure_test(I) of
- false ->
- opt(Is, [I|Acc], label_used(Lbl, St));
- true ->
- opt(Is, Acc, St)
- end;
opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
case is_label_defined(Is, L) of
false ->
@@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is;
drop_upto_label([_|Is]) -> drop_upto_label(Is);
drop_upto_label([]) -> [].
+%% unshare([Instruction]) -> [Instruction].
+%% Replace a jump to a return sequence (a `return` instruction
+%% optionally preced by a `deallocate` instruction) with the return
+%% sequence. This always saves execution time and may also save code
+%% space (depending on the architecture). Eliminating `jump`
+%% instructions also gives beam_trim more opportunities to trim the
+%% stack.
+
+unshare(Is) ->
+ Short = unshare_collect_short(Is, #{}),
+ unshare_short(Is, Short).
+
+unshare_collect_short([{label,L},return|Is], Map) ->
+ unshare_collect_short(Is, Map#{L=>[return]});
+unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
+ %% `deallocate` and `return` are combined into one instruction by
+ %% the loader.
+ unshare_collect_short(Is, Map#{L=>[D,return]});
+unshare_collect_short([_|Is], Map) ->
+ unshare_collect_short(Is, Map);
+unshare_collect_short([], Map) -> Map.
+
+unshare_short([{jump,{f,F}}=I|Is], Map) ->
+ case Map of
+ #{F:=Seq} ->
+ Seq ++ unshare_short(Is, Map);
+ #{} ->
+ [I|unshare_short(Is, Map)]
+ end;
+unshare_short([I|Is], Map) ->
+ [I|unshare_short(Is, Map)];
+unshare_short([], _Map) -> [].
+
%% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
%% Update the cerl_set UsedCerlSet with any function-local labels
%% (i.e. not with labels in call instructions) referenced by
diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl
index 2323a439e9..5730e9704e 100644
--- a/lib/compiler/src/beam_peep.erl
+++ b/lib/compiler/src/beam_peep.erl
@@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) ->
peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) ->
%% Sometimes beam_jump has missed this optimization.
peep(Is, gb_sets:empty(), [I|Acc]);
-peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) ->
+peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) ->
case prune_redundant_values(Vls0, F) of
[] ->
%% No values left. Must convert to plain jump.
I = {jump,F},
peep([I|Is], gb_sets:empty(), Acc0);
- [{atom,_}=Value,Lbl] when Op =:= select_val ->
+ [{atom,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [{integer,_}=Value,Lbl] when Op =:= select_val ->
+ [{integer,_}=Value,Lbl] ->
%% Single value left. Convert to regular test.
Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
- [Arity,Lbl] when Op =:= select_tuple_arity ->
- %% Single value left. Convert to regular test
- Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is],
- peep(Is1, SeenTests0, Acc0);
[{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 ->
%% Replace with is_boolean test.
Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is],
peep(Is1, SeenTests0, Acc0);
[_|_]=Vls ->
- I = {select,Op,R,F,Vls},
+ I = {select,select_val,R,F,Vls},
peep(Is, gb_sets:empty(), [I|Acc0])
end;
peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index ac2d943fef..2dda67eac6 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,8 @@
-export([module/2]).
-include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
+-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+ reverse/1,reverse/2,
splitwith/2,takewhile/2,unzip/1]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) ->
%%% with a cheaper instructions
%%%
-ssa_opt_live(#st{ssa=Linear}=St) ->
- St#st{ssa=live_opt(reverse(Linear), #{}, [])}.
+ssa_opt_live(#st{ssa=Linear0}=St) ->
+ RevLinear = reverse(Linear0),
+ Blocks0 = maps:from_list(RevLinear),
+ Blocks = live_opt(RevLinear, #{}, Blocks0),
+ Linear = beam_ssa:linearize(Blocks),
+ St#st{ssa=Linear}.
-live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
- Successors = beam_ssa:successors(Blk0),
+live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
+ Blk1 = beam_ssa_share:block(Blk0, Blocks),
+ Successors = beam_ssa:successors(Blk1),
Live0 = live_opt_succ(Successors, L, LiveMap0),
- {Blk,Live} = live_opt_blk(Blk0, Live0),
+ {Blk,Live} = live_opt_blk(Blk1, Live0),
LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
- live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
+ live_opt(Bs, LiveMap, Blocks#{L:=Blk});
live_opt([], _, Acc) -> Acc.
live_opt_succ([S|Ss], L, LiveMap) ->
diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl
new file mode 100644
index 0000000000..426efa2cc9
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_share.erl
@@ -0,0 +1,370 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%
+%% Share code for semantically equivalent blocks referred to
+%% to by `br` and `switch` instructions.
+%%
+%% A similar optimization is done in beam_jump, but doing it here as
+%% well is beneficial as it may enable other optimizations. If there
+%% are many semantically equivalent clauses, this optimization can
+%% substanstially decrease compilation times.
+%%
+%% block/2 is called from the liveness optimization pass in
+%% beam_ssa_opt, as code sharing helps the liveness pass and vice
+%% versa.
+%%
+
+-module(beam_ssa_share).
+-export([module/2,block/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [keyfind/3,reverse/1,sort/1]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+ Fs = [function(F) || F <- Fs0],
+ {ok,Module#b_module{body=Fs}}.
+
+-spec block(Blk0, Blocks0) -> Blk when
+ Blk0 :: beam_ssa:b_blk(),
+ Blocks0 :: beam_ssa:block_map(),
+ Blk :: beam_ssa:b_blk().
+
+block(#b_blk{last=Last0}=Blk, Blocks) ->
+ case share_terminator(Last0, Blocks) of
+ none -> Blk;
+ Last -> Blk#b_blk{last=beam_ssa:normalize(Last)}
+ end.
+
+%%%
+%%% Local functions.
+%%%
+
+function(#b_function{anno=Anno,bs=Blocks0}=F) ->
+ try
+ PO = reverse(beam_ssa:rpo(Blocks0)),
+ {Blocks1,Changed} = blocks(PO, Blocks0, false),
+ Blocks = case Changed of
+ true ->
+ beam_ssa:trim_unreachable(Blocks1);
+ false ->
+ Blocks0
+ end,
+ F#b_function{bs=Blocks}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+blocks([L|Ls], Blocks, Changed) ->
+ #b_blk{last=Last0} = Blk0 = map_get(L, Blocks),
+ case block(Blk0, Blocks) of
+ #b_blk{last=Last0} ->
+ blocks(Ls, Blocks, Changed);
+ #b_blk{}=Blk ->
+ blocks(Ls, Blocks#{L:=Blk}, true)
+ end;
+blocks([], Blocks, Changed) ->
+ {Blocks,Changed}.
+
+share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) ->
+ {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks),
+ {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks),
+ case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of
+ true ->
+ %% The blocks are semantically equivalent.
+ Br#b_br{succ=Succ,fail=Succ};
+ false ->
+ if
+ Succ =:= Succ0, Fail =:= Fail0 ->
+ %% None of blocks were cut short.
+ none;
+ true ->
+ %% One or both labels were cut short
+ %% to avoid jumping to an empty block.
+ Br#b_br{succ=Succ,fail=Fail}
+ end
+ end;
+share_terminator(#b_switch{}=Sw, Blocks) ->
+ share_switch(Sw, Blocks);
+share_terminator(_Last, _Blocks) -> none.
+
+%% Test whether the two blocks are semantically equivalent. This
+%% function is specially optimized to return `false` as fast as
+%% possible if the blocks are not equivalent, as that is the common
+%% case.
+
+are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) ->
+ %% ?BADARG_BLOCK is special. Sharing could be incorrect.
+ false;
+are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1},
+ _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) ->
+ case {RetVal1,RetVal2} of
+ {#b_literal{},#b_literal{}} ->
+ case RetVal1 =:= RetVal2 of
+ true ->
+ %% The return values are identical literals. We
+ %% only need to compare the canonicalized bodies.
+ Can1 = canonical_is(Is1),
+ Can2 = canonical_is(Is2),
+ Can1 =:= Can2;
+ false ->
+ %% Non-equal literals.
+ false
+ end;
+ {#b_var{},#b_var{}} ->
+ %% The return values are varibles. We must canonicalize
+ %% the blocks (including returns) and compare them.
+ Can1 = canonical_is(Is1 ++ [Ret1]),
+ Can2 = canonical_is(Is2 ++ [Ret2]),
+ Can1 =:= Can2;
+ {_,_} ->
+ %% One literal and one variable.
+ false
+ end;
+are_equivalent(Succ,
+ #b_blk{is=Is1,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Fail,
+ #b_blk{is=Is2,
+ last=#b_br{bool=#b_literal{val=true},
+ succ=Target}},
+ Blocks) ->
+ %% Both blocks end with an unconditional branch to the
+ %% same target block. If the target block has phi nodes,
+ %% we must pick up the values from the phi nodes and
+ %% compare them.
+ #b_blk{is=Is} = map_get(Target, Blocks),
+ Phis1 = canonical_terminator_phis(Is, Succ),
+ Phis2 = canonical_terminator_phis(Is, Fail),
+ case {Phis1,Phis2} of
+ {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 ->
+ %% Different values are used in the phi nodes.
+ false;
+ {_,_} ->
+ %% The values in the phi nodes are variables or identical
+ %% literals. We must canonicalize the blocks and compare
+ %% them.
+ Can1 = canonical_is(Is1 ++ Phis1),
+ Can2 = canonical_is(Is2 ++ Phis2),
+ Can1 =:= Can2
+ end;
+are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}},
+ Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}},
+ Blocks) ->
+ %% Two-way branches with identical failure labels. First compare the
+ %% canonicalized bodies of the blocks.
+ case canonical_is(Is1) =:= canonical_is(Is2) of
+ false ->
+ %% Different bodies.
+ false;
+ true ->
+ %% Bodies were equal. That is fairly uncommon, so to keep
+ %% the code simple we will rewrite the `br` to a `switch`
+ %% and let share_switch/2 do the work of following the
+ %% branches.
+ Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0,
+ list=[{#b_literal{},Succ0}]},
+ #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks),
+ Fail =:= Succ
+ end;
+are_equivalent(_, _, _, _, _) -> false.
+
+share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) ->
+ Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []),
+ Res = do_share_switch(Prep, Blocks, []),
+ [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)],
+ Sw#b_switch{fail=Fail,list=List}.
+
+share_prepare_sw([{V,L0}|T], Blocks, N, Acc) ->
+ {L,_Blk} = shortcut_nonempty_block(L0, Blocks),
+ share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]);
+share_prepare_sw([], _, _, Acc) -> Acc.
+
+do_share_switch(Prep, Blocks, Acc) ->
+ Map = share_switch_1(Prep, Blocks, #{}),
+ share_switch_2(maps:values(Map), Blocks, Acc).
+
+share_switch_1([{Next0,Res}|T], Blocks, Map) ->
+ {Can,Next} = canonical_block(Next0, Blocks),
+ case Map of
+ #{Can:=Ls} ->
+ share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]});
+ #{} ->
+ share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]})
+ end;
+share_switch_1([], _Blocks, Map) -> Map.
+
+share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) ->
+ %% This block is not equivalent to any other block.
+ share_switch_2(T, Blocks, [{N,Res}|Acc]);
+share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, and all blocks
+ %% are either terminated with a `ret` or a `br` to the same target
+ %% block. Replace the labels in the `switch` for all of those
+ %% blocks with the label for the first of the blocks.
+ Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0,
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) ->
+ %% Two or more blocks are semantically equivalent, but they have
+ %% different successful successor blocks. Now we must check
+ %% recursively whether the successor blocks are equivalent too.
+ Acc = do_share_switch(Prep, Blocks, Acc0),
+ share_switch_2(T, Blocks, Acc);
+share_switch_2([], _, Acc) -> Acc.
+
+canonical_block({L,VarMap0}, Blocks) ->
+ #b_blk{is=Is,last=Last0} = map_get(L, Blocks),
+ case canonical_terminator(L, Last0, Blocks) of
+ none ->
+ %% The block has a terminator that we don't handle.
+ {{none,L},done};
+ {Last,done} ->
+ %% The block ends with a `ret` or an unconditional `br` to
+ %% another block.
+ {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,done};
+ {Last,Next} ->
+ %% The block ends with a conditional branch.
+ {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []),
+ {Can,{Next,VarMap}}
+ end.
+
+%% Translate a sequence of instructions to a canonical representation. If the
+%% canonical representation of two blocks compare equal, the blocks are
+%% semantically equivalent. The following translations are done:
+%%
+%% * Variables defined in the instruction sequence are replaced with
+%% {var,0}, {var,1}, and so on. Free variables are not changed.
+%%
+%% * `location` annotations that would produce a `line` instruction are
+%% kept. All other annotations are cleared.
+%%
+%% * Instructions are repackaged into tuples instead of into the
+%% usual records. The main reason is to avoid violating the types for
+%% the SSA records. We can simplify things a little by linking the
+%% instructions directly instead of putting them into a list.
+
+canonical_is(Is) ->
+ {Can,_} = canonical_is(Is, #{}, []),
+ Can.
+
+canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) ->
+ Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0],
+ Var = {var,map_size(VarMap0)},
+ VarMap = VarMap0#{Dst=>Var},
+ LineAnno = case Op of
+ bs_match ->
+ %% The location annotation for a bs_match instruction
+ %% is only used in warnings, never to emit a `line`
+ %% instruction. Therefore, it should not be included.
+ [];
+ _ ->
+ %% The location annotation will be used in a `line`
+ %% instruction. It must be included.
+ beam_ssa:get_anno(location, I, none)
+ end,
+ canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc});
+canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) ->
+ Acc1 = case Acc0 of
+ {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} ->
+ %% This is a tail-recursive call to a local function.
+ %% There will be no line instruction generated;
+ %% thus, the annotation is not significant.
+ {call,[],Var,Args,PrevAcc};
+ _ ->
+ Acc0
+ end,
+ {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap};
+canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) ->
+ {{br,succ,Fail,Acc},VarMap};
+canonical_is([#b_br{succ=Succ}], VarMap, Acc) ->
+ {{br,Succ,Acc},VarMap};
+canonical_is([], VarMap, Acc) ->
+ {Acc,VarMap}.
+
+canonical_terminator(_L, #b_ret{}=Ret, _Blocks) ->
+ {[Ret],done};
+canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) ->
+ #b_blk{is=Is} = map_get(Succ, Blocks),
+ case canonical_terminator_phis(Is, L) of
+ [] ->
+ {[],Succ};
+ [_|_]=Phis ->
+ {Phis ++ [Br],done}
+ end;
+canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) ->
+ {[Br],Succ};
+canonical_terminator(_, _, _) -> none.
+
+canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) ->
+ {Value,L} = keyfind(L, 2, PhiArgs),
+ [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)];
+canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) ->
+ %% We could get stuck into an infinite loop if we allowed the
+ %% comparisons to continue into this block. Force an unequal
+ %% compare with all other predecessors of this block.
+ [I#b_set{op=copy,args=[#b_literal{val=L}]}];
+canonical_terminator_phis(_, _) -> [].
+
+canonical_arg(#b_var{}=Var, VarMap) ->
+ case VarMap of
+ #{Var:=CanonicalVar} ->
+ CanonicalVar;
+ #{} ->
+ Var
+ end;
+canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) ->
+ {remote,canonical_arg(Mod, VarMap),
+ canonical_arg(Name, VarMap)};
+canonical_arg(Other, _VarMap) -> Other.
+
+%% Shortcut branches to empty blocks if safe.
+
+shortcut_nonempty_block(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk ->
+ %% This block is empty.
+ case is_forbidden(Succ, Blocks) of
+ false ->
+ shortcut_nonempty_block(Succ, Blocks);
+ true ->
+ {L,Blk}
+ end;
+ #b_blk{}=Blk ->
+ {L,Blk}
+ end.
+
+is_forbidden(L, Blocks) ->
+ case map_get(L, Blocks) of
+ #b_blk{is=[#b_set{op=phi}|_]} -> true;
+ #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+ #b_blk{} -> false
+ end.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 65c4f140c9..14c8c5b4ab 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -823,6 +823,9 @@ kernel_passes() ->
{pass,beam_kernel_to_ssa},
{iff,dssa,{listing,"ssa"}},
{iff,ssalint,{pass,beam_ssa_lint}},
+ {unless,no_share_opt,{pass,beam_ssa_share}},
+ {iff,dssashare,{listing,"ssashare"}},
+ {iff,ssalint,{pass,beam_ssa_lint}},
{unless,no_bsm_opt,{pass,beam_ssa_bsm}},
{iff,dssabsm,{listing,"ssabsm"}},
{iff,ssalint,{pass,beam_ssa_lint}},
@@ -2098,6 +2101,7 @@ pre_load() ->
beam_ssa_opt,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 86259270bd..1472e3fde1 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -45,6 +45,7 @@
beam_ssa_pp,
beam_ssa_pre_codegen,
beam_ssa_recv,
+ beam_ssa_share,
beam_ssa_type,
beam_trim,
beam_utils,
diff --git a/lib/compiler/test/beam_jump_SUITE.erl b/lib/compiler/test/beam_jump_SUITE.erl
index 40eb6f06c3..759d884dc4 100644
--- a/lib/compiler/test/beam_jump_SUITE.erl
+++ b/lib/compiler/test/beam_jump_SUITE.erl
@@ -22,7 +22,8 @@
-export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
init_per_group/2,end_per_group/2,
undefined_label/1,ambiguous_catch_try_state/1,
- unsafe_move_elimination/1,build_tuple/1]).
+ unsafe_move_elimination/1,build_tuple/1,
+ coverage/1]).
suite() ->
[{ct_hooks,[ts_install_cth]}].
@@ -35,7 +36,8 @@ groups() ->
[undefined_label,
ambiguous_catch_try_state,
unsafe_move_elimination,
- build_tuple
+ build_tuple,
+ coverage
]}].
init_per_suite(Config) ->
@@ -126,6 +128,44 @@ do_build_tuple(Message) ->
{Message#message3.id, Res}
end.
+coverage(_Config) ->
+ ok = coverage_1(ok),
+ {error,badarg} = coverage_1({error,badarg}),
+
+ gt = coverage_2(100, 42),
+ le = coverage_2(100, 999),
+ le = coverage_2([], []),
+ gt = coverage_2([], xxx),
+
+ ok.
+
+coverage_1(Var) ->
+ case id(Var) of
+ ok -> ok;
+ Error -> Error
+ end.
+
+%% Cover beam_jump:invert_test(is_ne_exact).
+coverage_2(Pre1, Pre2) ->
+ case
+ case Pre1 == [] of
+ false ->
+ false;
+ true ->
+ Pre2 /= []
+ end
+ of
+ true ->
+ gt;
+ false ->
+ case Pre1 > Pre2 of
+ true ->
+ gt;
+ false ->
+ le
+ end
+ end.
+
id(I) ->
I.
diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl
index 5536abbdde..e32e3eebfc 100644
--- a/lib/compiler/test/beam_ssa_SUITE.erl
+++ b/lib/compiler/test/beam_ssa_SUITE.erl
@@ -22,7 +22,7 @@
-export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1,
init_per_group/2,end_per_group/2,
calls/1,tuple_matching/1,recv/1,maps/1,
- cover_ssa_dead/1,combine_sw/1]).
+ cover_ssa_dead/1,combine_sw/1,share_opt/1]).
suite() -> [{ct_hooks,[ts_install_cth]}].
@@ -36,7 +36,8 @@ groups() ->
recv,
maps,
cover_ssa_dead,
- combine_sw
+ combine_sw,
+ share_opt
]}].
init_per_suite(Config) ->
@@ -467,5 +468,18 @@ do_comb_sw_2(X) ->
end,
erase(?MODULE).
+share_opt(_Config) ->
+ ok = do_share_opt(0).
+
+do_share_opt(A) ->
+ %% The compiler would be stuck in an infinite loop in beam_ssa_share.
+ case A of
+ 0 -> a;
+ 1 -> b;
+ 2 -> c
+ end,
+ receive after 1 -> ok end.
+
+
%% The identity function.
id(I) -> I.
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 8d8bbe9543..6eae7b1404 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -1250,7 +1250,6 @@ do_opt_guards_fun([]) -> [].
is_exception(guard_SUITE, {'-complex_not/1-fun-4-',1}) -> true;
is_exception(guard_SUITE, {'-complex_not/1-fun-5-',1}) -> true;
is_exception(guard_SUITE, {bad_guards,1}) -> true;
-is_exception(guard_SUITE, {bad_guards_3,2}) -> true;
is_exception(guard_SUITE, {nested_not_2b,4}) -> true;
is_exception(_, _) -> false.
diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl
index 229c3093d7..eed2a31f70 100644
--- a/lib/compiler/test/match_SUITE.erl
+++ b/lib/compiler/test/match_SUITE.erl
@@ -483,9 +483,8 @@ sel_same_value2(V) when V =:= 42; V =:= 43 ->
sel_same_value2(_) ->
error.
-%% Test deconstruction of select_val instructions in beam_peep into
-%% regular tests with just one possible value left. Hitting proper cases
-%% in beam_peep relies on unification of labels by beam_jump.
+%% Test deconstruction of select_val instructions to regular tests
+%% with zero or one values left.
deselectify(Config) when is_list(Config) ->
one_or_other = desel_tuple_arity({1}),
@@ -506,7 +505,31 @@ deselectify(Config) when is_list(Config) ->
one_or_other = dsel_atom_typecheck(one),
two = dsel_atom_typecheck(two),
- one_or_other = dsel_atom_typecheck(three).
+ one_or_other = dsel_atom_typecheck(three),
+
+ %% Cover deconstruction of select_val instructions in
+ %% beam_peep.
+
+ stop = dsel_peek_0(stop),
+ ignore = dsel_peek_0(ignore),
+ Config = dsel_peek_0(Config),
+
+ stop = dsel_peek_1(stop, any),
+ Config = dsel_peek_1(ignore, Config),
+ other = dsel_peek_1(other, ignored),
+
+ 0 = dsel_peek_2(0, any),
+ Config = dsel_peek_2(1, Config),
+ 2 = dsel_peek_2(2, ignored),
+
+ true = dsel_peek_3(true),
+ false = dsel_peek_3(false),
+ {error,Config} = dsel_peek_3(Config),
+
+ ok.
+
+%% The following will be optimized by the sharing optimizations
+%% in beam_ssa_opt.
desel_tuple_arity(Tuple) when is_tuple(Tuple) ->
case Tuple of
@@ -543,6 +566,39 @@ dsel_atom_typecheck(Val) when is_atom(Val) ->
_ -> one_or_other
end.
+%% The following functions are carefully crafted so that the sharing
+%% optimizations in beam_ssa_opt can't be applied. After applying the
+%% beam_jump:eliminate_moves/1 optimization and beam_clean:clean_labels/1
+%% has unified labels, beam_peep is able to optimize these functions.
+
+dsel_peek_0(A0) ->
+ case id(A0) of
+ stop -> stop;
+ ignore -> ignore;
+ A -> A
+ end.
+
+dsel_peek_1(A0, B) ->
+ case id(A0) of
+ stop -> stop;
+ ignore -> B;
+ A -> A
+ end.
+
+dsel_peek_2(A0, B) ->
+ case id(A0) of
+ 0 -> 0;
+ 1 -> B;
+ A -> A
+ end.
+
+dsel_peek_3(A0) ->
+ case id(A0) of
+ true -> true;
+ false -> false;
+ Other -> {error,Other}
+ end.
+
underscore(Config) when is_list(Config) ->
case Config of
[] ->
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index d6fc51448f..2a6303ece8 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -183,6 +183,7 @@ silly_coverage(Config) when is_list(Config) ->
%% beam_ssa_lint
%% beam_ssa_recv
+ %% beam_ssa_share
%% beam_ssa_pre_codegen
%% beam_ssa_opt
%% beam_ssa_codegen
@@ -190,6 +191,7 @@ silly_coverage(Config) when is_list(Config) ->
[{b_function,#{func_info=>{mod,foo,0}},args,bad_blocks,0}]},
expect_error(fun() -> beam_ssa_lint:module(BadSSA, []) end),
expect_error(fun() -> beam_ssa_recv:module(BadSSA, []) end),
+ expect_error(fun() -> beam_ssa_share:module(BadSSA, []) end),
expect_error(fun() -> beam_ssa_pre_codegen:module(BadSSA, []) end),
expect_error(fun() -> beam_ssa_opt:module(BadSSA, []) end),
expect_error(fun() -> beam_ssa_codegen:module(BadSSA, []) end),
@@ -258,7 +260,7 @@ silly_coverage(Config) when is_list(Config) ->
[{function,foo,0,2,
[{label,1},
{func_info,{atom,?MODULE},{atom,foo},0},
- {label,2},{select,op,r,{f,2},[{f,2}]}]}],
+ {label,2},{select,select_val,r,{f,2},[{f,2}]}]}],
2},
expect_error(fun() -> beam_peep:module(PeepInput, []) end),