diff options
author | Björn Gustavsson <[email protected]> | 2018-11-28 11:53:43 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-11-28 11:53:43 +0100 |
commit | 489b5eddf4b9b432e4e854bad603bb7d1d5678dc (patch) | |
tree | 325f9d30635d34257bd8e7d199e381705da549fe | |
parent | da90cf437e07f0f21b15e0d0ea8ff38cbc0bb1ea (diff) | |
parent | 96ac99d3e9b12295b7e8a70888fd1134a78e63a8 (diff) | |
download | otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.gz otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.tar.bz2 otp-489b5eddf4b9b432e4e854bad603bb7d1d5678dc.zip |
Merge pull request #2033 from bjorng/bjorn/compiler/sharing
Add a code sharing optimization pass
-rw-r--r-- | lib/compiler/src/Makefile | 1 | ||||
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 55 | ||||
-rw-r--r-- | lib/compiler/src/beam_peep.erl | 12 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 20 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_share.erl | 370 | ||||
-rw-r--r-- | lib/compiler/src/compile.erl | 4 | ||||
-rw-r--r-- | lib/compiler/src/compiler.app.src | 1 | ||||
-rw-r--r-- | lib/compiler/test/beam_jump_SUITE.erl | 44 | ||||
-rw-r--r-- | lib/compiler/test/beam_ssa_SUITE.erl | 18 | ||||
-rw-r--r-- | lib/compiler/test/compile_SUITE.erl | 1 | ||||
-rw-r--r-- | lib/compiler/test/match_SUITE.erl | 64 | ||||
-rw-r--r-- | lib/compiler/test/misc_SUITE.erl | 4 |
12 files changed, 555 insertions, 39 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index d475e5a19a..961dacc6c9 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -69,6 +69,7 @@ MODULES = \ beam_ssa_pp \ beam_ssa_pre_codegen \ beam_ssa_recv \ + beam_ssa_share \ beam_ssa_type \ beam_kernel_to_ssa \ beam_trim \ diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index d3a618d211..8b0e3e32f8 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -101,6 +101,10 @@ %%% always keep the label. (beam_clean will remove any unused %%% labels.) %%% +%%% (7) Replace a jump to a return instruction with a return instruction. +%%% Similarly, replace a jump to deallocate + return with those +%%% instructions. +%%% %%% Note: This modules depends on (almost) all branches and jumps only %%% going forward, so that we can remove instructions (including definition %%% of labels) after any label that has not been referenced by the code @@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) -> Asm3 = share(Asm2), Asm4 = move(Asm3), Asm5 = opt(Asm4, CLabel), - Asm = remove_unused_labels(Asm5), + Asm6 = unshare(Asm5), + Asm = remove_unused_labels(Asm6), {{function,Name,Arity,CLabel,Asm},Lc} catch Class:Error:Stack -> @@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no. { entry :: beam_asm:label(), %Entry label (must not be moved). replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace. - labels :: cerl_sets:set(), %Set of referenced labels. - index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed + labels :: cerl_sets:set() %Set of referenced labels. }). opt(Is0, CLabel) -> find_fixpoint(fun(Is) -> Lbls = initial_labels(Is), - St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}}, + St = #st{entry=CLabel,replace=#{},labels=Lbls}, opt(Is, [], St) end, Is0). @@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) -> %% as in the jump that follows -- thus it is not needed. opt(Is, Acc, St) end; -opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) -> - %% Similar to the above, except we have a fall-through rather than jump - %% Test Label Ops - %% label Label - case beam_utils:is_pure_test(I) of - false -> - opt(Is, [I|Acc], label_used(Lbl, St)); - true -> - opt(Is, Acc, St) - end; opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) -> case is_label_defined(Is, L) of false -> @@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is; drop_upto_label([_|Is]) -> drop_upto_label(Is); drop_upto_label([]) -> []. +%% unshare([Instruction]) -> [Instruction]. +%% Replace a jump to a return sequence (a `return` instruction +%% optionally preced by a `deallocate` instruction) with the return +%% sequence. This always saves execution time and may also save code +%% space (depending on the architecture). Eliminating `jump` +%% instructions also gives beam_trim more opportunities to trim the +%% stack. + +unshare(Is) -> + Short = unshare_collect_short(Is, #{}), + unshare_short(Is, Short). + +unshare_collect_short([{label,L},return|Is], Map) -> + unshare_collect_short(Is, Map#{L=>[return]}); +unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) -> + %% `deallocate` and `return` are combined into one instruction by + %% the loader. + unshare_collect_short(Is, Map#{L=>[D,return]}); +unshare_collect_short([_|Is], Map) -> + unshare_collect_short(Is, Map); +unshare_collect_short([], Map) -> Map. + +unshare_short([{jump,{f,F}}=I|Is], Map) -> + case Map of + #{F:=Seq} -> + Seq ++ unshare_short(Is, Map); + #{} -> + [I|unshare_short(Is, Map)] + end; +unshare_short([I|Is], Map) -> + [I|unshare_short(Is, Map)]; +unshare_short([], _Map) -> []. + %% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet' %% Update the cerl_set UsedCerlSet with any function-local labels %% (i.e. not with labels in call instructions) referenced by diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl index 2323a439e9..5730e9704e 100644 --- a/lib/compiler/src/beam_peep.erl +++ b/lib/compiler/src/beam_peep.erl @@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) -> peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) -> %% Sometimes beam_jump has missed this optimization. peep(Is, gb_sets:empty(), [I|Acc]); -peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) -> +peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) -> case prune_redundant_values(Vls0, F) of [] -> %% No values left. Must convert to plain jump. I = {jump,F}, peep([I|Is], gb_sets:empty(), Acc0); - [{atom,_}=Value,Lbl] when Op =:= select_val -> + [{atom,_}=Value,Lbl] -> %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); - [{integer,_}=Value,Lbl] when Op =:= select_val -> + [{integer,_}=Value,Lbl] -> %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); - [Arity,Lbl] when Op =:= select_tuple_arity -> - %% Single value left. Convert to regular test - Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is], - peep(Is1, SeenTests0, Acc0); [{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 -> %% Replace with is_boolean test. Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); [_|_]=Vls -> - I = {select,Op,R,F,Vls}, + I = {select,select_val,R,F,Vls}, peep(Is, gb_sets:empty(), [I|Acc0]) end; peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) -> diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index ac2d943fef..2dda67eac6 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -22,7 +22,8 @@ -export([module/2]). -include("beam_ssa.hrl"). --import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2, +-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2, + reverse/1,reverse/2, splitwith/2,takewhile/2,unzip/1]). -spec module(beam_ssa:b_module(), [compile:option()]) -> @@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) -> %%% with a cheaper instructions %%% -ssa_opt_live(#st{ssa=Linear}=St) -> - St#st{ssa=live_opt(reverse(Linear), #{}, [])}. +ssa_opt_live(#st{ssa=Linear0}=St) -> + RevLinear = reverse(Linear0), + Blocks0 = maps:from_list(RevLinear), + Blocks = live_opt(RevLinear, #{}, Blocks0), + Linear = beam_ssa:linearize(Blocks), + St#st{ssa=Linear}. -live_opt([{L,Blk0}|Bs], LiveMap0, Acc) -> - Successors = beam_ssa:successors(Blk0), +live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) -> + Blk1 = beam_ssa_share:block(Blk0, Blocks), + Successors = beam_ssa:successors(Blk1), Live0 = live_opt_succ(Successors, L, LiveMap0), - {Blk,Live} = live_opt_blk(Blk0, Live0), + {Blk,Live} = live_opt_blk(Blk1, Live0), LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), - live_opt(Bs, LiveMap, [{L,Blk}|Acc]); + live_opt(Bs, LiveMap, Blocks#{L:=Blk}); live_opt([], _, Acc) -> Acc. live_opt_succ([S|Ss], L, LiveMap) -> diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl new file mode 100644 index 0000000000..426efa2cc9 --- /dev/null +++ b/lib/compiler/src/beam_ssa_share.erl @@ -0,0 +1,370 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% Share code for semantically equivalent blocks referred to +%% to by `br` and `switch` instructions. +%% +%% A similar optimization is done in beam_jump, but doing it here as +%% well is beneficial as it may enable other optimizations. If there +%% are many semantically equivalent clauses, this optimization can +%% substanstially decrease compilation times. +%% +%% block/2 is called from the liveness optimization pass in +%% beam_ssa_opt, as code sharing helps the liveness pass and vice +%% versa. +%% + +-module(beam_ssa_share). +-export([module/2,block/2]). + +-include("beam_ssa.hrl"). + +-import(lists, [keyfind/3,reverse/1,sort/1]). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, _Opts) -> + Fs = [function(F) || F <- Fs0], + {ok,Module#b_module{body=Fs}}. + +-spec block(Blk0, Blocks0) -> Blk when + Blk0 :: beam_ssa:b_blk(), + Blocks0 :: beam_ssa:block_map(), + Blk :: beam_ssa:b_blk(). + +block(#b_blk{last=Last0}=Blk, Blocks) -> + case share_terminator(Last0, Blocks) of + none -> Blk; + Last -> Blk#b_blk{last=beam_ssa:normalize(Last)} + end. + +%%% +%%% Local functions. +%%% + +function(#b_function{anno=Anno,bs=Blocks0}=F) -> + try + PO = reverse(beam_ssa:rpo(Blocks0)), + {Blocks1,Changed} = blocks(PO, Blocks0, false), + Blocks = case Changed of + true -> + beam_ssa:trim_unreachable(Blocks1); + false -> + Blocks0 + end, + F#b_function{bs=Blocks} + catch + Class:Error:Stack -> + #{func_info:={_,Name,Arity}} = Anno, + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +blocks([L|Ls], Blocks, Changed) -> + #b_blk{last=Last0} = Blk0 = map_get(L, Blocks), + case block(Blk0, Blocks) of + #b_blk{last=Last0} -> + blocks(Ls, Blocks, Changed); + #b_blk{}=Blk -> + blocks(Ls, Blocks#{L:=Blk}, true) + end; +blocks([], Blocks, Changed) -> + {Blocks,Changed}. + +share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) -> + {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks), + {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks), + case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of + true -> + %% The blocks are semantically equivalent. + Br#b_br{succ=Succ,fail=Succ}; + false -> + if + Succ =:= Succ0, Fail =:= Fail0 -> + %% None of blocks were cut short. + none; + true -> + %% One or both labels were cut short + %% to avoid jumping to an empty block. + Br#b_br{succ=Succ,fail=Fail} + end + end; +share_terminator(#b_switch{}=Sw, Blocks) -> + share_switch(Sw, Blocks); +share_terminator(_Last, _Blocks) -> none. + +%% Test whether the two blocks are semantically equivalent. This +%% function is specially optimized to return `false` as fast as +%% possible if the blocks are not equivalent, as that is the common +%% case. + +are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) -> + %% ?BADARG_BLOCK is special. Sharing could be incorrect. + false; +are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1}, + _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) -> + case {RetVal1,RetVal2} of + {#b_literal{},#b_literal{}} -> + case RetVal1 =:= RetVal2 of + true -> + %% The return values are identical literals. We + %% only need to compare the canonicalized bodies. + Can1 = canonical_is(Is1), + Can2 = canonical_is(Is2), + Can1 =:= Can2; + false -> + %% Non-equal literals. + false + end; + {#b_var{},#b_var{}} -> + %% The return values are varibles. We must canonicalize + %% the blocks (including returns) and compare them. + Can1 = canonical_is(Is1 ++ [Ret1]), + Can2 = canonical_is(Is2 ++ [Ret2]), + Can1 =:= Can2; + {_,_} -> + %% One literal and one variable. + false + end; +are_equivalent(Succ, + #b_blk{is=Is1, + last=#b_br{bool=#b_literal{val=true}, + succ=Target}}, + Fail, + #b_blk{is=Is2, + last=#b_br{bool=#b_literal{val=true}, + succ=Target}}, + Blocks) -> + %% Both blocks end with an unconditional branch to the + %% same target block. If the target block has phi nodes, + %% we must pick up the values from the phi nodes and + %% compare them. + #b_blk{is=Is} = map_get(Target, Blocks), + Phis1 = canonical_terminator_phis(Is, Succ), + Phis2 = canonical_terminator_phis(Is, Fail), + case {Phis1,Phis2} of + {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 -> + %% Different values are used in the phi nodes. + false; + {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 -> + %% Different values are used in the phi nodes. + false; + {_,_} -> + %% The values in the phi nodes are variables or identical + %% literals. We must canonicalize the blocks and compare + %% them. + Can1 = canonical_is(Is1 ++ Phis1), + Can2 = canonical_is(Is2 ++ Phis2), + Can1 =:= Can2 + end; +are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}}, + Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}}, + Blocks) -> + %% Two-way branches with identical failure labels. First compare the + %% canonicalized bodies of the blocks. + case canonical_is(Is1) =:= canonical_is(Is2) of + false -> + %% Different bodies. + false; + true -> + %% Bodies were equal. That is fairly uncommon, so to keep + %% the code simple we will rewrite the `br` to a `switch` + %% and let share_switch/2 do the work of following the + %% branches. + Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0, + list=[{#b_literal{},Succ0}]}, + #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks), + Fail =:= Succ + end; +are_equivalent(_, _, _, _, _) -> false. + +share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) -> + Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []), + Res = do_share_switch(Prep, Blocks, []), + [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)], + Sw#b_switch{fail=Fail,list=List}. + +share_prepare_sw([{V,L0}|T], Blocks, N, Acc) -> + {L,_Blk} = shortcut_nonempty_block(L0, Blocks), + share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]); +share_prepare_sw([], _, _, Acc) -> Acc. + +do_share_switch(Prep, Blocks, Acc) -> + Map = share_switch_1(Prep, Blocks, #{}), + share_switch_2(maps:values(Map), Blocks, Acc). + +share_switch_1([{Next0,Res}|T], Blocks, Map) -> + {Can,Next} = canonical_block(Next0, Blocks), + case Map of + #{Can:=Ls} -> + share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]}); + #{} -> + share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]}) + end; +share_switch_1([], _Blocks, Map) -> Map. + +share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) -> + %% This block is not equivalent to any other block. + share_switch_2(T, Blocks, [{N,Res}|Acc]); +share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) -> + %% Two or more blocks are semantically equivalent, and all blocks + %% are either terminated with a `ret` or a `br` to the same target + %% block. Replace the labels in the `switch` for all of those + %% blocks with the label for the first of the blocks. + Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0, + share_switch_2(T, Blocks, Acc); +share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) -> + %% Two or more blocks are semantically equivalent, but they have + %% different successful successor blocks. Now we must check + %% recursively whether the successor blocks are equivalent too. + Acc = do_share_switch(Prep, Blocks, Acc0), + share_switch_2(T, Blocks, Acc); +share_switch_2([], _, Acc) -> Acc. + +canonical_block({L,VarMap0}, Blocks) -> + #b_blk{is=Is,last=Last0} = map_get(L, Blocks), + case canonical_terminator(L, Last0, Blocks) of + none -> + %% The block has a terminator that we don't handle. + {{none,L},done}; + {Last,done} -> + %% The block ends with a `ret` or an unconditional `br` to + %% another block. + {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []), + {Can,done}; + {Last,Next} -> + %% The block ends with a conditional branch. + {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []), + {Can,{Next,VarMap}} + end. + +%% Translate a sequence of instructions to a canonical representation. If the +%% canonical representation of two blocks compare equal, the blocks are +%% semantically equivalent. The following translations are done: +%% +%% * Variables defined in the instruction sequence are replaced with +%% {var,0}, {var,1}, and so on. Free variables are not changed. +%% +%% * `location` annotations that would produce a `line` instruction are +%% kept. All other annotations are cleared. +%% +%% * Instructions are repackaged into tuples instead of into the +%% usual records. The main reason is to avoid violating the types for +%% the SSA records. We can simplify things a little by linking the +%% instructions directly instead of putting them into a list. + +canonical_is(Is) -> + {Can,_} = canonical_is(Is, #{}, []), + Can. + +canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) -> + Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0], + Var = {var,map_size(VarMap0)}, + VarMap = VarMap0#{Dst=>Var}, + LineAnno = case Op of + bs_match -> + %% The location annotation for a bs_match instruction + %% is only used in warnings, never to emit a `line` + %% instruction. Therefore, it should not be included. + []; + _ -> + %% The location annotation will be used in a `line` + %% instruction. It must be included. + beam_ssa:get_anno(location, I, none) + end, + canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc}); +canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) -> + Acc1 = case Acc0 of + {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} -> + %% This is a tail-recursive call to a local function. + %% There will be no line instruction generated; + %% thus, the annotation is not significant. + {call,[],Var,Args,PrevAcc}; + _ -> + Acc0 + end, + {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap}; +canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) -> + {{br,succ,Fail,Acc},VarMap}; +canonical_is([#b_br{succ=Succ}], VarMap, Acc) -> + {{br,Succ,Acc},VarMap}; +canonical_is([], VarMap, Acc) -> + {Acc,VarMap}. + +canonical_terminator(_L, #b_ret{}=Ret, _Blocks) -> + {[Ret],done}; +canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) -> + #b_blk{is=Is} = map_get(Succ, Blocks), + case canonical_terminator_phis(Is, L) of + [] -> + {[],Succ}; + [_|_]=Phis -> + {Phis ++ [Br],done} + end; +canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) -> + {[Br],Succ}; +canonical_terminator(_, _, _) -> none. + +canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) -> + {Value,L} = keyfind(L, 2, PhiArgs), + [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)]; +canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) -> + %% We could get stuck into an infinite loop if we allowed the + %% comparisons to continue into this block. Force an unequal + %% compare with all other predecessors of this block. + [I#b_set{op=copy,args=[#b_literal{val=L}]}]; +canonical_terminator_phis(_, _) -> []. + +canonical_arg(#b_var{}=Var, VarMap) -> + case VarMap of + #{Var:=CanonicalVar} -> + CanonicalVar; + #{} -> + Var + end; +canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) -> + {remote,canonical_arg(Mod, VarMap), + canonical_arg(Name, VarMap)}; +canonical_arg(Other, _VarMap) -> Other. + +%% Shortcut branches to empty blocks if safe. + +shortcut_nonempty_block(L, Blocks) -> + case map_get(L, Blocks) of + #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk -> + %% This block is empty. + case is_forbidden(Succ, Blocks) of + false -> + shortcut_nonempty_block(Succ, Blocks); + true -> + {L,Blk} + end; + #b_blk{}=Blk -> + {L,Blk} + end. + +is_forbidden(L, Blocks) -> + case map_get(L, Blocks) of + #b_blk{is=[#b_set{op=phi}|_]} -> true; + #b_blk{is=[#b_set{op=peek_message}|_]} -> true; + #b_blk{} -> false + end. diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 65c4f140c9..14c8c5b4ab 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -823,6 +823,9 @@ kernel_passes() -> {pass,beam_kernel_to_ssa}, {iff,dssa,{listing,"ssa"}}, {iff,ssalint,{pass,beam_ssa_lint}}, + {unless,no_share_opt,{pass,beam_ssa_share}}, + {iff,dssashare,{listing,"ssashare"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, {unless,no_bsm_opt,{pass,beam_ssa_bsm}}, {iff,dssabsm,{listing,"ssabsm"}}, {iff,ssalint,{pass,beam_ssa_lint}}, @@ -2098,6 +2101,7 @@ pre_load() -> beam_ssa_opt, beam_ssa_pre_codegen, beam_ssa_recv, + beam_ssa_share, beam_ssa_type, beam_trim, beam_utils, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 86259270bd..1472e3fde1 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -45,6 +45,7 @@ beam_ssa_pp, beam_ssa_pre_codegen, beam_ssa_recv, + beam_ssa_share, beam_ssa_type, beam_trim, beam_utils, diff --git a/lib/compiler/test/beam_jump_SUITE.erl b/lib/compiler/test/beam_jump_SUITE.erl index 40eb6f06c3..759d884dc4 100644 --- a/lib/compiler/test/beam_jump_SUITE.erl +++ b/lib/compiler/test/beam_jump_SUITE.erl @@ -22,7 +22,8 @@ -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1, init_per_group/2,end_per_group/2, undefined_label/1,ambiguous_catch_try_state/1, - unsafe_move_elimination/1,build_tuple/1]). + unsafe_move_elimination/1,build_tuple/1, + coverage/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -35,7 +36,8 @@ groups() -> [undefined_label, ambiguous_catch_try_state, unsafe_move_elimination, - build_tuple + build_tuple, + coverage ]}]. init_per_suite(Config) -> @@ -126,6 +128,44 @@ do_build_tuple(Message) -> {Message#message3.id, Res} end. +coverage(_Config) -> + ok = coverage_1(ok), + {error,badarg} = coverage_1({error,badarg}), + + gt = coverage_2(100, 42), + le = coverage_2(100, 999), + le = coverage_2([], []), + gt = coverage_2([], xxx), + + ok. + +coverage_1(Var) -> + case id(Var) of + ok -> ok; + Error -> Error + end. + +%% Cover beam_jump:invert_test(is_ne_exact). +coverage_2(Pre1, Pre2) -> + case + case Pre1 == [] of + false -> + false; + true -> + Pre2 /= [] + end + of + true -> + gt; + false -> + case Pre1 > Pre2 of + true -> + gt; + false -> + le + end + end. + id(I) -> I. diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl index 5536abbdde..e32e3eebfc 100644 --- a/lib/compiler/test/beam_ssa_SUITE.erl +++ b/lib/compiler/test/beam_ssa_SUITE.erl @@ -22,7 +22,7 @@ -export([all/0,suite/0,groups/0,init_per_suite/1,end_per_suite/1, init_per_group/2,end_per_group/2, calls/1,tuple_matching/1,recv/1,maps/1, - cover_ssa_dead/1,combine_sw/1]). + cover_ssa_dead/1,combine_sw/1,share_opt/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -36,7 +36,8 @@ groups() -> recv, maps, cover_ssa_dead, - combine_sw + combine_sw, + share_opt ]}]. init_per_suite(Config) -> @@ -467,5 +468,18 @@ do_comb_sw_2(X) -> end, erase(?MODULE). +share_opt(_Config) -> + ok = do_share_opt(0). + +do_share_opt(A) -> + %% The compiler would be stuck in an infinite loop in beam_ssa_share. + case A of + 0 -> a; + 1 -> b; + 2 -> c + end, + receive after 1 -> ok end. + + %% The identity function. id(I) -> I. diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl index 8d8bbe9543..6eae7b1404 100644 --- a/lib/compiler/test/compile_SUITE.erl +++ b/lib/compiler/test/compile_SUITE.erl @@ -1250,7 +1250,6 @@ do_opt_guards_fun([]) -> []. is_exception(guard_SUITE, {'-complex_not/1-fun-4-',1}) -> true; is_exception(guard_SUITE, {'-complex_not/1-fun-5-',1}) -> true; is_exception(guard_SUITE, {bad_guards,1}) -> true; -is_exception(guard_SUITE, {bad_guards_3,2}) -> true; is_exception(guard_SUITE, {nested_not_2b,4}) -> true; is_exception(_, _) -> false. diff --git a/lib/compiler/test/match_SUITE.erl b/lib/compiler/test/match_SUITE.erl index 229c3093d7..eed2a31f70 100644 --- a/lib/compiler/test/match_SUITE.erl +++ b/lib/compiler/test/match_SUITE.erl @@ -483,9 +483,8 @@ sel_same_value2(V) when V =:= 42; V =:= 43 -> sel_same_value2(_) -> error. -%% Test deconstruction of select_val instructions in beam_peep into -%% regular tests with just one possible value left. Hitting proper cases -%% in beam_peep relies on unification of labels by beam_jump. +%% Test deconstruction of select_val instructions to regular tests +%% with zero or one values left. deselectify(Config) when is_list(Config) -> one_or_other = desel_tuple_arity({1}), @@ -506,7 +505,31 @@ deselectify(Config) when is_list(Config) -> one_or_other = dsel_atom_typecheck(one), two = dsel_atom_typecheck(two), - one_or_other = dsel_atom_typecheck(three). + one_or_other = dsel_atom_typecheck(three), + + %% Cover deconstruction of select_val instructions in + %% beam_peep. + + stop = dsel_peek_0(stop), + ignore = dsel_peek_0(ignore), + Config = dsel_peek_0(Config), + + stop = dsel_peek_1(stop, any), + Config = dsel_peek_1(ignore, Config), + other = dsel_peek_1(other, ignored), + + 0 = dsel_peek_2(0, any), + Config = dsel_peek_2(1, Config), + 2 = dsel_peek_2(2, ignored), + + true = dsel_peek_3(true), + false = dsel_peek_3(false), + {error,Config} = dsel_peek_3(Config), + + ok. + +%% The following will be optimized by the sharing optimizations +%% in beam_ssa_opt. desel_tuple_arity(Tuple) when is_tuple(Tuple) -> case Tuple of @@ -543,6 +566,39 @@ dsel_atom_typecheck(Val) when is_atom(Val) -> _ -> one_or_other end. +%% The following functions are carefully crafted so that the sharing +%% optimizations in beam_ssa_opt can't be applied. After applying the +%% beam_jump:eliminate_moves/1 optimization and beam_clean:clean_labels/1 +%% has unified labels, beam_peep is able to optimize these functions. + +dsel_peek_0(A0) -> + case id(A0) of + stop -> stop; + ignore -> ignore; + A -> A + end. + +dsel_peek_1(A0, B) -> + case id(A0) of + stop -> stop; + ignore -> B; + A -> A + end. + +dsel_peek_2(A0, B) -> + case id(A0) of + 0 -> 0; + 1 -> B; + A -> A + end. + +dsel_peek_3(A0) -> + case id(A0) of + true -> true; + false -> false; + Other -> {error,Other} + end. + underscore(Config) when is_list(Config) -> case Config of [] -> diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl index d6fc51448f..2a6303ece8 100644 --- a/lib/compiler/test/misc_SUITE.erl +++ b/lib/compiler/test/misc_SUITE.erl @@ -183,6 +183,7 @@ silly_coverage(Config) when is_list(Config) -> %% beam_ssa_lint %% beam_ssa_recv + %% beam_ssa_share %% beam_ssa_pre_codegen %% beam_ssa_opt %% beam_ssa_codegen @@ -190,6 +191,7 @@ silly_coverage(Config) when is_list(Config) -> [{b_function,#{func_info=>{mod,foo,0}},args,bad_blocks,0}]}, expect_error(fun() -> beam_ssa_lint:module(BadSSA, []) end), expect_error(fun() -> beam_ssa_recv:module(BadSSA, []) end), + expect_error(fun() -> beam_ssa_share:module(BadSSA, []) end), expect_error(fun() -> beam_ssa_pre_codegen:module(BadSSA, []) end), expect_error(fun() -> beam_ssa_opt:module(BadSSA, []) end), expect_error(fun() -> beam_ssa_codegen:module(BadSSA, []) end), @@ -258,7 +260,7 @@ silly_coverage(Config) when is_list(Config) -> [{function,foo,0,2, [{label,1}, {func_info,{atom,?MODULE},{atom,foo},0}, - {label,2},{select,op,r,{f,2},[{f,2}]}]}], + {label,2},{select,select_val,r,{f,2},[{f,2}]}]}], 2}, expect_error(fun() -> beam_peep:module(PeepInput, []) end), |