diff options
Diffstat (limited to 'lib/compiler/src')
-rw-r--r-- | lib/compiler/src/Makefile | 1 | ||||
-rw-r--r-- | lib/compiler/src/beam_block.erl | 38 | ||||
-rw-r--r-- | lib/compiler/src/beam_jump.erl | 55 | ||||
-rw-r--r-- | lib/compiler/src/beam_peep.erl | 12 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa.erl | 1 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_codegen.erl | 39 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_dead.erl | 59 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 20 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_pre_codegen.erl | 22 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_share.erl | 370 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_type.erl | 103 | ||||
-rw-r--r-- | lib/compiler/src/beam_validator.erl | 6 | ||||
-rw-r--r-- | lib/compiler/src/compile.erl | 4 | ||||
-rw-r--r-- | lib/compiler/src/compiler.app.src | 1 | ||||
-rw-r--r-- | lib/compiler/src/sys_core_fold.erl | 20 |
15 files changed, 662 insertions, 89 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index d475e5a19a..961dacc6c9 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -69,6 +69,7 @@ MODULES = \ beam_ssa_pp \ beam_ssa_pre_codegen \ beam_ssa_recv \ + beam_ssa_share \ beam_ssa_type \ beam_kernel_to_ssa \ beam_trim \ diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 9d8d5b2b0c..707974b2c1 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -22,7 +22,7 @@ -module(beam_block). -export([module/2]). --import(lists, [reverse/1,splitwith/2]). +-import(lists, [keysort/2,reverse/1,splitwith/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -53,7 +53,8 @@ blockify([I|Is0]=IsAll, Acc) -> case collect(I) of error -> blockify(Is0, [I|Acc]); Instr when is_tuple(Instr) -> - {Block,Is} = collect_block(IsAll), + {Block0,Is} = collect_block(IsAll), + Block = sort_moves(Block0), blockify(Is, [{block,Block}|Acc]) end; blockify([], Acc) -> reverse(Acc). @@ -117,3 +118,36 @@ embed_lines([{block,B1},{line,_}=Line|T], Acc) -> embed_lines([I|Is], Acc) -> embed_lines(Is, [I|Acc]); embed_lines([], Acc) -> Acc. + +%% sort_moves([Instruction]) -> [Instruction]. +%% Sort move instructions on the Y register to give the loader +%% more opportunities for combining instructions. + +sort_moves([{set,[{x,_}],[{y,_}],move}=I|Is0]) -> + {Moves,Is} = sort_moves_1(Is0, x, y, [I]), + Moves ++ sort_moves(Is); +sort_moves([{set,[{y,_}],[{x,_}],move}=I|Is0]) -> + {Moves,Is} = sort_moves_1(Is0, y, x, [I]), + Moves ++ sort_moves(Is); +sort_moves([I|Is]) -> + [I|sort_moves(Is)]; +sort_moves([]) -> []. + +sort_moves_1([{set,[{x,0}],[_],move}=I|Is], _DTag, _STag, Acc) -> + %% The loader sometimes combines a move to x0 with the + %% instruction that follows, producing, for example, a move_call + %% instruction. Therefore, we don't want include this move + %% instruction in the sorting. + {sort_on_yreg(Acc)++[I],Is}; +sort_moves_1([{set,[{DTag,_}],[{STag,_}],move}=I|Is], DTag, STag, Acc) -> + sort_moves_1(Is, DTag, STag, [I|Acc]); +sort_moves_1(Is, _DTag, _STag, Acc) -> + {sort_on_yreg(Acc),Is}. + +sort_on_yreg([{set,[Dst],[Src],move}|_]=Moves) -> + case {Dst,Src} of + {{y,_},{x,_}} -> + keysort(2, Moves); + {{x,_},{y,_}} -> + keysort(3, Moves) + end. diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl index d3a618d211..8b0e3e32f8 100644 --- a/lib/compiler/src/beam_jump.erl +++ b/lib/compiler/src/beam_jump.erl @@ -101,6 +101,10 @@ %%% always keep the label. (beam_clean will remove any unused %%% labels.) %%% +%%% (7) Replace a jump to a return instruction with a return instruction. +%%% Similarly, replace a jump to deallocate + return with those +%%% instructions. +%%% %%% Note: This modules depends on (almost) all branches and jumps only %%% going forward, so that we can remove instructions (including definition %%% of labels) after any label that has not been referenced by the code @@ -150,7 +154,8 @@ function({function,Name,Arity,CLabel,Asm0}, Lc0) -> Asm3 = share(Asm2), Asm4 = move(Asm3), Asm5 = opt(Asm4, CLabel), - Asm = remove_unused_labels(Asm5), + Asm6 = unshare(Asm5), + Asm = remove_unused_labels(Asm6), {{function,Name,Arity,CLabel,Asm},Lc} catch Class:Error:Stack -> @@ -393,14 +398,13 @@ extract_seq_1(_, _) -> no. { entry :: beam_asm:label(), %Entry label (must not be moved). replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace. - labels :: cerl_sets:set(), %Set of referenced labels. - index :: beam_utils:code_index() | {lazy,[beam_utils:instruction()]} %Index built lazily only if needed + labels :: cerl_sets:set() %Set of referenced labels. }). opt(Is0, CLabel) -> find_fixpoint(fun(Is) -> Lbls = initial_labels(Is), - St = #st{entry=CLabel,replace=#{},labels=Lbls,index={lazy,Is}}, + St = #st{entry=CLabel,replace=#{},labels=Lbls}, opt(Is, [], St) end, Is0). @@ -425,16 +429,6 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) -> %% as in the jump that follows -- thus it is not needed. opt(Is, Acc, St) end; -opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) -> - %% Similar to the above, except we have a fall-through rather than jump - %% Test Label Ops - %% label Label - case beam_utils:is_pure_test(I) of - false -> - opt(Is, [I|Acc], label_used(Lbl, St)); - true -> - opt(Is, Acc, St) - end; opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) -> case is_label_defined(Is, L) of false -> @@ -625,6 +619,39 @@ drop_upto_label([{label,_}|_]=Is) -> Is; drop_upto_label([_|Is]) -> drop_upto_label(Is); drop_upto_label([]) -> []. +%% unshare([Instruction]) -> [Instruction]. +%% Replace a jump to a return sequence (a `return` instruction +%% optionally preced by a `deallocate` instruction) with the return +%% sequence. This always saves execution time and may also save code +%% space (depending on the architecture). Eliminating `jump` +%% instructions also gives beam_trim more opportunities to trim the +%% stack. + +unshare(Is) -> + Short = unshare_collect_short(Is, #{}), + unshare_short(Is, Short). + +unshare_collect_short([{label,L},return|Is], Map) -> + unshare_collect_short(Is, Map#{L=>[return]}); +unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) -> + %% `deallocate` and `return` are combined into one instruction by + %% the loader. + unshare_collect_short(Is, Map#{L=>[D,return]}); +unshare_collect_short([_|Is], Map) -> + unshare_collect_short(Is, Map); +unshare_collect_short([], Map) -> Map. + +unshare_short([{jump,{f,F}}=I|Is], Map) -> + case Map of + #{F:=Seq} -> + Seq ++ unshare_short(Is, Map); + #{} -> + [I|unshare_short(Is, Map)] + end; +unshare_short([I|Is], Map) -> + [I|unshare_short(Is, Map)]; +unshare_short([], _Map) -> []. + %% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet' %% Update the cerl_set UsedCerlSet with any function-local labels %% (i.e. not with labels in call instructions) referenced by diff --git a/lib/compiler/src/beam_peep.erl b/lib/compiler/src/beam_peep.erl index 2323a439e9..5730e9704e 100644 --- a/lib/compiler/src/beam_peep.erl +++ b/lib/compiler/src/beam_peep.erl @@ -94,30 +94,26 @@ peep([{gc_bif,_,_,_,_,Dst}=I|Is], SeenTests0, Acc) -> peep([{jump,{f,L}},{label,L}=I|Is], _, Acc) -> %% Sometimes beam_jump has missed this optimization. peep(Is, gb_sets:empty(), [I|Acc]); -peep([{select,Op,R,F,Vls0}|Is], SeenTests0, Acc0) -> +peep([{select,select_val,R,F,Vls0}|Is], SeenTests0, Acc0) -> case prune_redundant_values(Vls0, F) of [] -> %% No values left. Must convert to plain jump. I = {jump,F}, peep([I|Is], gb_sets:empty(), Acc0); - [{atom,_}=Value,Lbl] when Op =:= select_val -> + [{atom,_}=Value,Lbl] -> %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); - [{integer,_}=Value,Lbl] when Op =:= select_val -> + [{integer,_}=Value,Lbl] -> %% Single value left. Convert to regular test. Is1 = [{test,is_eq_exact,F,[R,Value]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); - [Arity,Lbl] when Op =:= select_tuple_arity -> - %% Single value left. Convert to regular test - Is1 = [{test,test_arity,F,[R,Arity]},{jump,Lbl}|Is], - peep(Is1, SeenTests0, Acc0); [{atom,B1},Lbl,{atom,B2},Lbl] when B1 =:= not B2 -> %% Replace with is_boolean test. Is1 = [{test,is_boolean,F,[R]},{jump,Lbl}|Is], peep(Is1, SeenTests0, Acc0); [_|_]=Vls -> - I = {select,Op,R,F,Vls}, + I = {select,select_val,R,F,Vls}, peep(Is, gb_sets:empty(), [I|Acc0]) end; peep([{get_map_elements,Fail,Src,List}=I|Is], _SeenTests, Acc0) -> diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl index c5e23d2ae0..b491e340b7 100644 --- a/lib/compiler/src/beam_ssa.erl +++ b/lib/compiler/src/beam_ssa.erl @@ -194,6 +194,7 @@ no_side_effect(#b_set{op=Op}) -> extract -> true; get_hd -> true; get_tl -> true; + get_map_element -> true; get_tuple_element -> true; has_map_field -> true; is_nonempty_list -> true; diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index 3c14062d0b..d3facc5911 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -747,7 +747,16 @@ need_live_anno(Op) -> end. %%% -%%% Add annotations for defined Y registers. +%%% Add the following annotations for Y registers: +%%% +%%% def_yregs An ordset with variables that refer to live Y registers. +%%% That is, Y registers that that have been killed +%%% are not included. This annotation is added to all +%%% instructions that require Y registers to be initialized. +%%% +%%% kill_yregs This annotation is added to call instructions. It is +%%% an ordset containing variables referring to Y registers +%%% that will no longer be used after the call instruction. %%% defined(Linear, #cg{regs=Regs}) -> @@ -863,13 +872,35 @@ opt_allocate(Linear, #cg{regs=Regs}) -> opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs) when is_integer(Stk) -> - Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []), - I = I0#cg_alloc{def_yregs=Yregs}, - [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; + %% Collect the variables that are initialized by copy + %% instruction in this block. + case ordsets:from_list(opt_allocate_defs(Is, Regs)) of + Yregs when length(Yregs) =:= Stk -> + %% Those copy instructions are sufficient to fully + %% initialize the stack frame. + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)]; + Yregs0 -> + %% Determine a conservative approximation of the Y + %% registers that are guaranteed to be initialized by all + %% successors of this block, and to it add the variables + %% initialized by copy instructions in this block. + Yregs1 = opt_alloc_def(Bs0, gb_sets:singleton(L), []), + Yregs = ordsets:union(Yregs0, Yregs1), + I = I0#cg_alloc{def_yregs=Yregs}, + [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)] + end; opt_allocate_1([B|Bs], Regs) -> [B|opt_allocate_1(Bs, Regs)]; opt_allocate_1([], _) -> []. +opt_allocate_defs([#cg_set{op=copy,dst=Dst}|Is], Regs) -> + case is_yreg(Dst, Regs) of + true -> [Dst|opt_allocate_defs(Is, Regs)]; + false -> [] + end; +opt_allocate_defs(_, _Regs) -> []. + opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) -> case gb_sets:is_member(L, Ws0) of false -> diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl index c20652580d..067d9a6741 100644 --- a/lib/compiler/src/beam_ssa_dead.erl +++ b/lib/compiler/src/beam_ssa_dead.erl @@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) -> Last. shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) -> - St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])}, + RelOp = {'=:=',Bool,Lit}, + St = St0#st{rel_op=RelOp}, #b_br{bool=#b_literal{val=true},succ=L} = shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}), [{Lit,L}|shortcut_switch(T, Bool, Bs, St0)]; @@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) -> %% Literal argument. Simplify to a `br`. beam_ssa:normalize(Sw#b_switch{arg=Val}); #b_var{} -> - case St of - #st{rel_op=none} -> - %% No previous relational operator is stored. - %% Give up. + %% Try optimizing the switch. + case eval_switch(List, Arg, St, Fail) of + none -> none; - #st{} -> - %% There is a previous relational operator stored. - %% Try optimizing the switch. - case eval_switch(List, Arg, St, Fail) of - none -> - none; - To when is_integer(To) -> - %% Either one of the values in the switch - %% matched a previous value in a '=:=' test, or - %% none of the values matched a previous test. - #b_br{bool=#b_literal{val=true},succ=To,fail=To} - end + To when is_integer(To) -> + %% Either one of the values in the switch + %% matched a previous value in a '=:=' test, or + %% none of the values matched a previous test. + #b_br{bool=#b_literal{val=true},succ=To,fail=To} end end; eval_terminator(#b_ret{}, _Bs, _St) -> none. -eval_switch([{Lit,Lbl}|T], Arg, St, Fail) -> - case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of - none -> - %% This label could be reached. - eval_switch(T, Arg, St, none); - #b_literal{val=false} -> - %% This branch will never be taken. - eval_switch(T, Arg, St, Fail); - #b_literal{val=true} -> +eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) -> + %% There is a previous relational operator testing the same variable. + %% Optimization may be possible. + eval_switch_1(List, Arg, PrevOp, Fail); +eval_switch(_, _, _, _) -> + %% There is either no previous relational operator, or it tests + %% a different variable. Nothing to optimize. + none. + +eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) -> + RelOp = {'=:=',Arg,Lit}, + case will_succeed(PrevOp, RelOp) of + yes -> %% Success. This branch will always be taken. - Lbl + Lbl; + no -> + %% This branch will never be taken. + eval_switch_1(T, Arg, PrevOp, Fail); + maybe -> + %% This label could be reached. + eval_switch_1(T, Arg, PrevOp, none) end; -eval_switch([], _Arg, _St, Fail) -> +eval_switch_1([], _Arg, _PrevOp, Fail) -> %% Fail is now either the failure label or 'none'. Fail. diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index ac2d943fef..2dda67eac6 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -22,7 +22,8 @@ -export([module/2]). -include("beam_ssa.hrl"). --import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2, +-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2, + reverse/1,reverse/2, splitwith/2,takewhile/2,unzip/1]). -spec module(beam_ssa:b_module(), [compile:option()]) -> @@ -787,15 +788,20 @@ float_flush_regs(#fs{regs=Rs}) -> %%% with a cheaper instructions %%% -ssa_opt_live(#st{ssa=Linear}=St) -> - St#st{ssa=live_opt(reverse(Linear), #{}, [])}. +ssa_opt_live(#st{ssa=Linear0}=St) -> + RevLinear = reverse(Linear0), + Blocks0 = maps:from_list(RevLinear), + Blocks = live_opt(RevLinear, #{}, Blocks0), + Linear = beam_ssa:linearize(Blocks), + St#st{ssa=Linear}. -live_opt([{L,Blk0}|Bs], LiveMap0, Acc) -> - Successors = beam_ssa:successors(Blk0), +live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) -> + Blk1 = beam_ssa_share:block(Blk0, Blocks), + Successors = beam_ssa:successors(Blk1), Live0 = live_opt_succ(Successors, L, LiveMap0), - {Blk,Live} = live_opt_blk(Blk0, Live0), + {Blk,Live} = live_opt_blk(Blk1, Live0), LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), - live_opt(Bs, LiveMap, [{L,Blk}|Acc]); + live_opt(Bs, LiveMap, Blocks#{L:=Blk}); live_opt([], _, Acc) -> Acc. live_opt_succ([S|Ss], L, LiveMap) -> diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 9175931375..56fe9b4793 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -1478,6 +1478,10 @@ copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs, Copy, Count, Acc) -> I = I0#b_set{args=copy_sub_args(Args0, Copy)}, {reverse(Acc, [I|acc_copy([], Copy)]),Count}; +copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0) + when Op =:= call; Op =:= make_fun -> + {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0), + {reverse(Acc, [I]),Count}; copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) -> {reverse(Acc, acc_copy(Is, Copy)),Count}; copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) -> @@ -1990,13 +1994,15 @@ reserve_zregs(Blocks, Intervals, Res) -> beam_ssa:fold_rpo(F, [0], Res, Blocks). reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}, - #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) -> - case Val of - #b_literal{val=Arity} when Arity bsr 32 =:= 0 -> + #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) -> + case {Val,Last} of + {#b_literal{val=Arity},#b_br{}} when Arity bsr 32 =:= 0 -> %% These two instructions can be combined to a test_arity %% instruction provided that the arity variable is short-lived. reserve_zreg_1(Dst, ShortLived, A0); - _ -> + {_,_} -> + %% Either the arity is too big, or the boolean value from + %% '=:=' will be returned. A0 end; reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}], @@ -2211,7 +2217,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> Free = init_free(maps:to_list(Res)), Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], Intervals = sort(Intervals1), - IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end, + IsReserved = fun(#i{reg=Reg}) -> + case Reg of + none -> false; + {prefer,{_,_}} -> false; + {_,_} -> true + end + end, {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), L = #l{unhandled_res=UnhandledRes, unhandled_any=Unhandled,free=Free}, diff --git a/lib/compiler/src/beam_ssa_share.erl b/lib/compiler/src/beam_ssa_share.erl new file mode 100644 index 0000000000..426efa2cc9 --- /dev/null +++ b/lib/compiler/src/beam_ssa_share.erl @@ -0,0 +1,370 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%% +%% Share code for semantically equivalent blocks referred to +%% to by `br` and `switch` instructions. +%% +%% A similar optimization is done in beam_jump, but doing it here as +%% well is beneficial as it may enable other optimizations. If there +%% are many semantically equivalent clauses, this optimization can +%% substanstially decrease compilation times. +%% +%% block/2 is called from the liveness optimization pass in +%% beam_ssa_opt, as code sharing helps the liveness pass and vice +%% versa. +%% + +-module(beam_ssa_share). +-export([module/2,block/2]). + +-include("beam_ssa.hrl"). + +-import(lists, [keyfind/3,reverse/1,sort/1]). + +-spec module(beam_ssa:b_module(), [compile:option()]) -> + {'ok',beam_ssa:b_module()}. + +module(#b_module{body=Fs0}=Module, _Opts) -> + Fs = [function(F) || F <- Fs0], + {ok,Module#b_module{body=Fs}}. + +-spec block(Blk0, Blocks0) -> Blk when + Blk0 :: beam_ssa:b_blk(), + Blocks0 :: beam_ssa:block_map(), + Blk :: beam_ssa:b_blk(). + +block(#b_blk{last=Last0}=Blk, Blocks) -> + case share_terminator(Last0, Blocks) of + none -> Blk; + Last -> Blk#b_blk{last=beam_ssa:normalize(Last)} + end. + +%%% +%%% Local functions. +%%% + +function(#b_function{anno=Anno,bs=Blocks0}=F) -> + try + PO = reverse(beam_ssa:rpo(Blocks0)), + {Blocks1,Changed} = blocks(PO, Blocks0, false), + Blocks = case Changed of + true -> + beam_ssa:trim_unreachable(Blocks1); + false -> + Blocks0 + end, + F#b_function{bs=Blocks} + catch + Class:Error:Stack -> + #{func_info:={_,Name,Arity}} = Anno, + io:fwrite("Function: ~w/~w\n", [Name,Arity]), + erlang:raise(Class, Error, Stack) + end. + +blocks([L|Ls], Blocks, Changed) -> + #b_blk{last=Last0} = Blk0 = map_get(L, Blocks), + case block(Blk0, Blocks) of + #b_blk{last=Last0} -> + blocks(Ls, Blocks, Changed); + #b_blk{}=Blk -> + blocks(Ls, Blocks#{L:=Blk}, true) + end; +blocks([], Blocks, Changed) -> + {Blocks,Changed}. + +share_terminator(#b_br{bool=#b_var{},succ=Succ0,fail=Fail0}=Br, Blocks) -> + {Succ,SuccBlk} = shortcut_nonempty_block(Succ0, Blocks), + {Fail,FailBlk} = shortcut_nonempty_block(Fail0, Blocks), + case are_equivalent(Succ, SuccBlk, Fail, FailBlk, Blocks) of + true -> + %% The blocks are semantically equivalent. + Br#b_br{succ=Succ,fail=Succ}; + false -> + if + Succ =:= Succ0, Fail =:= Fail0 -> + %% None of blocks were cut short. + none; + true -> + %% One or both labels were cut short + %% to avoid jumping to an empty block. + Br#b_br{succ=Succ,fail=Fail} + end + end; +share_terminator(#b_switch{}=Sw, Blocks) -> + share_switch(Sw, Blocks); +share_terminator(_Last, _Blocks) -> none. + +%% Test whether the two blocks are semantically equivalent. This +%% function is specially optimized to return `false` as fast as +%% possible if the blocks are not equivalent, as that is the common +%% case. + +are_equivalent(_Succ, _, ?BADARG_BLOCK, _, _Blocks) -> + %% ?BADARG_BLOCK is special. Sharing could be incorrect. + false; +are_equivalent(_Succ, #b_blk{is=Is1,last=#b_ret{arg=RetVal1}=Ret1}, + _Fail, #b_blk{is=Is2,last=#b_ret{arg=RetVal2}=Ret2}, _Blocks) -> + case {RetVal1,RetVal2} of + {#b_literal{},#b_literal{}} -> + case RetVal1 =:= RetVal2 of + true -> + %% The return values are identical literals. We + %% only need to compare the canonicalized bodies. + Can1 = canonical_is(Is1), + Can2 = canonical_is(Is2), + Can1 =:= Can2; + false -> + %% Non-equal literals. + false + end; + {#b_var{},#b_var{}} -> + %% The return values are varibles. We must canonicalize + %% the blocks (including returns) and compare them. + Can1 = canonical_is(Is1 ++ [Ret1]), + Can2 = canonical_is(Is2 ++ [Ret2]), + Can1 =:= Can2; + {_,_} -> + %% One literal and one variable. + false + end; +are_equivalent(Succ, + #b_blk{is=Is1, + last=#b_br{bool=#b_literal{val=true}, + succ=Target}}, + Fail, + #b_blk{is=Is2, + last=#b_br{bool=#b_literal{val=true}, + succ=Target}}, + Blocks) -> + %% Both blocks end with an unconditional branch to the + %% same target block. If the target block has phi nodes, + %% we must pick up the values from the phi nodes and + %% compare them. + #b_blk{is=Is} = map_get(Target, Blocks), + Phis1 = canonical_terminator_phis(Is, Succ), + Phis2 = canonical_terminator_phis(Is, Fail), + case {Phis1,Phis2} of + {[#b_set{args=[#b_literal{}]}|_],_} when Phis1 =/= Phis2 -> + %% Different values are used in the phi nodes. + false; + {_,[#b_set{args=[#b_literal{}]}|_]} when Phis1 =/= Phis2 -> + %% Different values are used in the phi nodes. + false; + {_,_} -> + %% The values in the phi nodes are variables or identical + %% literals. We must canonicalize the blocks and compare + %% them. + Can1 = canonical_is(Is1 ++ Phis1), + Can2 = canonical_is(Is2 ++ Phis2), + Can1 =:= Can2 + end; +are_equivalent(Succ0, #b_blk{is=Is1,last=#b_br{bool=#b_var{},fail=Same}}, + Fail0, #b_blk{is=Is2,last=#b_br{bool=#b_var{},fail=Same}}, + Blocks) -> + %% Two-way branches with identical failure labels. First compare the + %% canonicalized bodies of the blocks. + case canonical_is(Is1) =:= canonical_is(Is2) of + false -> + %% Different bodies. + false; + true -> + %% Bodies were equal. That is fairly uncommon, so to keep + %% the code simple we will rewrite the `br` to a `switch` + %% and let share_switch/2 do the work of following the + %% branches. + Sw = #b_switch{arg=#b_var{name=not_used},fail=Fail0, + list=[{#b_literal{},Succ0}]}, + #b_switch{fail=Fail,list=[{_,Succ}]} = share_switch(Sw, Blocks), + Fail =:= Succ + end; +are_equivalent(_, _, _, _, _) -> false. + +share_switch(#b_switch{fail=Fail0,list=List0}=Sw, Blocks) -> + Prep = share_prepare_sw([{value,Fail0}|List0], Blocks, 0, []), + Res = do_share_switch(Prep, Blocks, []), + [{_,Fail}|List] = [VL || {_,VL} <- sort(Res)], + Sw#b_switch{fail=Fail,list=List}. + +share_prepare_sw([{V,L0}|T], Blocks, N, Acc) -> + {L,_Blk} = shortcut_nonempty_block(L0, Blocks), + share_prepare_sw(T, Blocks, N+1, [{{L,#{}},{N,{V,L}}}|Acc]); +share_prepare_sw([], _, _, Acc) -> Acc. + +do_share_switch(Prep, Blocks, Acc) -> + Map = share_switch_1(Prep, Blocks, #{}), + share_switch_2(maps:values(Map), Blocks, Acc). + +share_switch_1([{Next0,Res}|T], Blocks, Map) -> + {Can,Next} = canonical_block(Next0, Blocks), + case Map of + #{Can:=Ls} -> + share_switch_1(T, Blocks, Map#{Can:=[{Next,Res}|Ls]}); + #{} -> + share_switch_1(T, Blocks, Map#{Can=>[{Next,Res}]}) + end; +share_switch_1([], _Blocks, Map) -> Map. + +share_switch_2([[{_,{N,Res}}]|T], Blocks, Acc) -> + %% This block is not equivalent to any other block. + share_switch_2(T, Blocks, [{N,Res}|Acc]); +share_switch_2([[{done,{_,{_,Common}}}|_]=Eqs|T], Blocks, Acc0) -> + %% Two or more blocks are semantically equivalent, and all blocks + %% are either terminated with a `ret` or a `br` to the same target + %% block. Replace the labels in the `switch` for all of those + %% blocks with the label for the first of the blocks. + Acc = [{N,{V,Common}} || {done,{N,{V,_}}} <- Eqs] ++ Acc0, + share_switch_2(T, Blocks, Acc); +share_switch_2([[{_,_}|_]=Prep|T], Blocks, Acc0) -> + %% Two or more blocks are semantically equivalent, but they have + %% different successful successor blocks. Now we must check + %% recursively whether the successor blocks are equivalent too. + Acc = do_share_switch(Prep, Blocks, Acc0), + share_switch_2(T, Blocks, Acc); +share_switch_2([], _, Acc) -> Acc. + +canonical_block({L,VarMap0}, Blocks) -> + #b_blk{is=Is,last=Last0} = map_get(L, Blocks), + case canonical_terminator(L, Last0, Blocks) of + none -> + %% The block has a terminator that we don't handle. + {{none,L},done}; + {Last,done} -> + %% The block ends with a `ret` or an unconditional `br` to + %% another block. + {Can,_VarMap} = canonical_is(Is ++ Last, VarMap0, []), + {Can,done}; + {Last,Next} -> + %% The block ends with a conditional branch. + {Can,VarMap} = canonical_is(Is ++ Last, VarMap0, []), + {Can,{Next,VarMap}} + end. + +%% Translate a sequence of instructions to a canonical representation. If the +%% canonical representation of two blocks compare equal, the blocks are +%% semantically equivalent. The following translations are done: +%% +%% * Variables defined in the instruction sequence are replaced with +%% {var,0}, {var,1}, and so on. Free variables are not changed. +%% +%% * `location` annotations that would produce a `line` instruction are +%% kept. All other annotations are cleared. +%% +%% * Instructions are repackaged into tuples instead of into the +%% usual records. The main reason is to avoid violating the types for +%% the SSA records. We can simplify things a little by linking the +%% instructions directly instead of putting them into a list. + +canonical_is(Is) -> + {Can,_} = canonical_is(Is, #{}, []), + Can. + +canonical_is([#b_set{op=Op,dst=Dst,args=Args0}=I|Is], VarMap0, Acc) -> + Args = [canonical_arg(Arg, VarMap0) || Arg <-Args0], + Var = {var,map_size(VarMap0)}, + VarMap = VarMap0#{Dst=>Var}, + LineAnno = case Op of + bs_match -> + %% The location annotation for a bs_match instruction + %% is only used in warnings, never to emit a `line` + %% instruction. Therefore, it should not be included. + []; + _ -> + %% The location annotation will be used in a `line` + %% instruction. It must be included. + beam_ssa:get_anno(location, I, none) + end, + canonical_is(Is, VarMap, {Op,LineAnno,Var,Args,Acc}); +canonical_is([#b_ret{arg=Arg}], VarMap, Acc0) -> + Acc1 = case Acc0 of + {call,_Anno,Var,[#b_local{}|_]=Args,PrevAcc} -> + %% This is a tail-recursive call to a local function. + %% There will be no line instruction generated; + %% thus, the annotation is not significant. + {call,[],Var,Args,PrevAcc}; + _ -> + Acc0 + end, + {{ret,canonical_arg(Arg, VarMap),Acc1},VarMap}; +canonical_is([#b_br{bool=#b_var{},fail=Fail}], VarMap, Acc) -> + {{br,succ,Fail,Acc},VarMap}; +canonical_is([#b_br{succ=Succ}], VarMap, Acc) -> + {{br,Succ,Acc},VarMap}; +canonical_is([], VarMap, Acc) -> + {Acc,VarMap}. + +canonical_terminator(_L, #b_ret{}=Ret, _Blocks) -> + {[Ret],done}; +canonical_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}=Br, Blocks) -> + #b_blk{is=Is} = map_get(Succ, Blocks), + case canonical_terminator_phis(Is, L) of + [] -> + {[],Succ}; + [_|_]=Phis -> + {Phis ++ [Br],done} + end; +canonical_terminator(_L, #b_br{bool=#b_var{},succ=Succ}=Br, _Blocks) -> + {[Br],Succ}; +canonical_terminator(_, _, _) -> none. + +canonical_terminator_phis([#b_set{op=phi,args=PhiArgs}=Phi|Is], L) -> + {Value,L} = keyfind(L, 2, PhiArgs), + [Phi#b_set{op=copy,args=[Value]}|canonical_terminator_phis(Is, L)]; +canonical_terminator_phis([#b_set{op=peek_message}=I|_], L) -> + %% We could get stuck into an infinite loop if we allowed the + %% comparisons to continue into this block. Force an unequal + %% compare with all other predecessors of this block. + [I#b_set{op=copy,args=[#b_literal{val=L}]}]; +canonical_terminator_phis(_, _) -> []. + +canonical_arg(#b_var{}=Var, VarMap) -> + case VarMap of + #{Var:=CanonicalVar} -> + CanonicalVar; + #{} -> + Var + end; +canonical_arg(#b_remote{mod=Mod,name=Name}, VarMap) -> + {remote,canonical_arg(Mod, VarMap), + canonical_arg(Name, VarMap)}; +canonical_arg(Other, _VarMap) -> Other. + +%% Shortcut branches to empty blocks if safe. + +shortcut_nonempty_block(L, Blocks) -> + case map_get(L, Blocks) of + #b_blk{is=[],last=#b_br{bool=#b_literal{val=true},succ=Succ}}=Blk -> + %% This block is empty. + case is_forbidden(Succ, Blocks) of + false -> + shortcut_nonempty_block(Succ, Blocks); + true -> + {L,Blk} + end; + #b_blk{}=Blk -> + {L,Blk} + end. + +is_forbidden(L, Blocks) -> + case map_get(L, Blocks) of + #b_blk{is=[#b_set{op=phi}|_]} -> true; + #b_blk{is=[#b_set{op=peek_message}|_]} -> true; + #b_blk{} -> false + end. diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl index 18e6e73a46..95fc3bb0e9 100644 --- a/lib/compiler/src/beam_ssa_type.erl +++ b/lib/compiler/src/beam_ssa_type.erl @@ -27,9 +27,10 @@ -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}). --record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()}, +-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()}, ls :: #{beam_ssa:label():=type_db()}, - sub :: #{beam_ssa:var_name():=beam_ssa:value()} + once :: cerl_sets:set(beam_ssa:b_var()), + sub :: #{beam_ssa:b_var():=beam_ssa:value()} }). -define(ATOM_SET_SIZE, 5). @@ -56,13 +57,15 @@ Block :: beam_ssa:b_blk(). opt(Linear, Args) -> + UsedOnce = used_once(Linear, Args), Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]), FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown}, name=#b_literal{val=unknown}, arity=0}]}, Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} || #b_var{}=Var <- Args]), - D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}}, + D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}}, + once=UsedOnce,sub=#{}}, opt_1(Linear, D). opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) -> @@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret. update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) -> update_successor(S, Ts, D); -update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) -> - D = update_successor_bool(Bool, false, Fail, Ts, D0), - SuccTs = infer_types(Bool, Ts, D0), - update_successor_bool(Bool, true, Succ, SuccTs, D); -update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) -> - D = update_successor(Fail, Ts, D0), - foldl(fun({Val,S}, A) -> - T = get_type(Val, Ts), - update_successor(S, Ts#{V=>T}, A) - end, D, List); +update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) -> + case cerl_sets:is_element(Bool, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this br terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(Bool, Ts0), + D = update_successor(Fail, Ts, D0), + SuccTs = infer_types(Bool, Ts, D0), + update_successor(Succ, SuccTs, D); + false -> + D = update_successor_bool(Bool, false, Fail, Ts0, D0), + SuccTs = infer_types(Bool, Ts0, D0), + update_successor_bool(Bool, true, Succ, SuccTs, D) + end; +update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) -> + case cerl_sets:is_element(V, D0#d.once) of + true -> + %% This variable is defined in this block and is only + %% referenced by this switch terminator. Therefore, there is + %% no need to include the type database passed on to the + %% successors of this block. + Ts = maps:remove(V, Ts0), + D = update_successor(Fail, Ts, D0), + F = fun({_Val,S}, A) -> + update_successor(S, Ts, A) + end, + foldl(F, D, List); + false -> + D = update_successor(Fail, Ts0, D0), + F = fun({Val,S}, A) -> + T = get_type(Val, Ts0), + update_successor(S, Ts0#{V=>T}, A) + end, + foldl(F, D, List) + end; update_successors(#b_ret{}, _Ts, D) -> D. update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> @@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) -> update_successor(S, Ts, D) end. +update_successor(?BADARG_BLOCK, _Ts, #d{}=D) -> + %% We KNOW that no variables are used in the ?BADARG_BLOCK, + %% so there is no need to update the type information. That + %% can be a huge timesaver for huge functions. + D; update_successor(S, Ts0, #d{ls=Ls}=D) -> case Ls of #{S:=Ts1} -> @@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) -> Br0 end. +%%% +%%% Calculate the set of variables that are only used once in the +%%% block that they are defined in. That will allow us to discard type +%%% information for variables that will never be referenced by the +%%% successor blocks, potentially improving compilation times. +%%% + +used_once(Linear, Args) -> + Map0 = used_once_1(reverse(Linear), #{}), + Map = maps:without(Args, Map0), + cerl_sets:from_list(maps:keys(Map)). + +used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) -> + Uses = used_once_2([Last|reverse(Is)], L, Uses0), + used_once_1(Bs, Uses); +used_once_1([], Uses) -> Uses. + +used_once_2([I|Is], L, Uses0) -> + Uses = used_once_uses(beam_ssa:used(I), L, Uses0), + case I of + #b_set{dst=Dst} -> + case Uses of + #{Dst:=[L]} -> + used_once_2(Is, L, Uses); + #{} -> + used_once_2(Is, L, maps:remove(Dst, Uses)) + end; + _ -> + used_once_2(Is, L, Uses) + end; +used_once_2([], _, Uses) -> Uses. + +used_once_uses([V|Vs], L, Uses) -> + case Uses of + #{V:=Us} -> + used_once_uses(Vs, L, Uses#{V:=[L|Us]}); + #{} -> + used_once_uses(Vs, L, Uses#{V=>[L]}) + end; +used_once_uses([], _, Uses) -> Uses. + + get_types(Values, Ts) -> [get_type(Val, Ts) || Val <- Values]. -spec get_type(beam_ssa:value(), type_db()) -> type(). diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index 7d908df3bf..1945faba7f 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -1366,8 +1366,10 @@ kill_aliases(Reg, #st{aliases=Aliases0}=St) -> St end. -set_type(Type, {x,_}=Reg, Vst) -> set_type_reg(Type, Reg, Vst); -set_type(Type, {y,_}=Reg, Vst) -> set_type_y(Type, Reg, Vst); +set_type(Type, {x,_}=Reg, Vst) -> + set_type_reg(Type, Reg, Reg, Vst); +set_type(Type, {y,_}=Reg, Vst) -> + set_type_reg(Type, Reg, Reg, Vst); set_type(_, _, #vst{}=Vst) -> Vst. set_type_reg(Type, Src, Dst, Vst) -> diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 65c4f140c9..14c8c5b4ab 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -823,6 +823,9 @@ kernel_passes() -> {pass,beam_kernel_to_ssa}, {iff,dssa,{listing,"ssa"}}, {iff,ssalint,{pass,beam_ssa_lint}}, + {unless,no_share_opt,{pass,beam_ssa_share}}, + {iff,dssashare,{listing,"ssashare"}}, + {iff,ssalint,{pass,beam_ssa_lint}}, {unless,no_bsm_opt,{pass,beam_ssa_bsm}}, {iff,dssabsm,{listing,"ssabsm"}}, {iff,ssalint,{pass,beam_ssa_lint}}, @@ -2098,6 +2101,7 @@ pre_load() -> beam_ssa_opt, beam_ssa_pre_codegen, beam_ssa_recv, + beam_ssa_share, beam_ssa_type, beam_trim, beam_utils, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 86259270bd..1472e3fde1 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -45,6 +45,7 @@ beam_ssa_pp, beam_ssa_pre_codegen, beam_ssa_recv, + beam_ssa_share, beam_ssa_type, beam_trim, beam_utils, diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index d848cd8f19..43c99be982 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -2667,12 +2667,20 @@ opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}], #c_call{module=#c_literal{val=erlang}, name=#c_literal{val=raise}, args=[Class,Exp,#c_var{name=Cooked}]} -> - %% The stacktrace is only used in a call to erlang:raise/3. - %% There is no need to build the stacktrace. Replace the - %% call to erlang:raise/3 with the the raw_raise/3 instruction, - %% which will use a raw stacktrace. - #c_primop{name=#c_literal{val=raw_raise}, - args=[Class,Exp,RawStk]}; + case core_lib:is_var_used(Cooked, #c_cons{hd=Class,tl=Exp}) of + true -> + %% Not safe. The stacktrace is used in the class or + %% reason. + Let; + false -> + %% The stacktrace is only used in the last + %% argument for erlang:raise/3. There is no need + %% to build the stacktrace. Replace the call to + %% erlang:raise/3 with the the raw_raise/3 + %% instruction, which will use a raw stacktrace. + #c_primop{name=#c_literal{val=raw_raise}, + args=[Class,Exp,RawStk]} + end; #c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked -> case core_lib:is_var_used(Cooked, Arg) of false -> |