diff options
author | Björn Gustavsson <[email protected]> | 2019-02-05 02:46:45 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2019-02-06 10:34:23 +0100 |
commit | 077cfc59f1dca89aa1231ce291100aa4b33c50e0 (patch) | |
tree | 3f0eb95adda9f0432b18a7021a3d97599a34fb00 /lib/compiler | |
parent | e82637d6f078409db27449383df5342294df0b63 (diff) | |
download | otp-077cfc59f1dca89aa1231ce291100aa4b33c50e0.tar.gz otp-077cfc59f1dca89aa1231ce291100aa4b33c50e0.tar.bz2 otp-077cfc59f1dca89aa1231ce291100aa4b33c50e0.zip |
Optimize ssa_opt_sink for huge functions
The ssa_opt_sink optimization of beam_ssa_opt could get very slow
for certain huge functions. 9a190cae9bd7 partly addressed this issue
by terminating the optimization early if there happened to be no
get_tuple_element instructions at all in the function.
This commit addresses the issue more directly by making the dominator
calculation in beam_ssa:dominators/1 more efficient. The same
algorithm as before is used, but it is implemented in a more efficient
way based on the ideas in "A Simple, Fast Dominance Algorithm"
(http://www.hipersoft.rice.edu/grads/publications/dom14.pdf).
As well as being more efficient, the new implementation also gives
an explicit representation of the dominator tree, which makes it
possible to simplify and optimize the ssa_opt_sink optimization.
Diffstat (limited to 'lib/compiler')
-rw-r--r-- | lib/compiler/src/beam_ssa.erl | 73 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_bsm.erl | 10 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_opt.erl | 82 | ||||
-rw-r--r-- | lib/compiler/src/beam_ssa_pre_codegen.erl | 4 |
4 files changed, 100 insertions, 69 deletions
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl index 9c29c98064..0f662d851d 100644 --- a/lib/compiler/src/beam_ssa.erl +++ b/lib/compiler/src/beam_ssa.erl @@ -23,7 +23,7 @@ -export([add_anno/3,get_anno/2,get_anno/3, clobbers_xregs/1,def/2,def_used/2, definitions/1, - dominators/1, + dominators/1,common_dominators/3, flatmapfold_instrs_rpo/4, fold_po/3,fold_po/4,fold_rpo/3,fold_rpo/4, fold_instrs_rpo/4, @@ -85,7 +85,8 @@ -type anno() :: #{atom() := any()}. -type block_map() :: #{label():=b_blk()}. --type dominator_map() :: #{label():=ordsets:ordset(label())}. +-type dominator_map() :: #{label():=[label()]}. +-type numbering_map() :: #{label():=non_neg_integer()}. -type usage_map() :: #{b_var():=[{label(),b_set() | terminator()}]}. -type definition_map() :: #{b_var():=b_set()}. -type rename_map() :: #{b_var():=value()}. @@ -327,18 +328,41 @@ def_used(Ls, Blocks) -> Preds = cerl_sets:from_list(Top), def_used_1(Blks, Preds, [], []). +%% dominators(BlockMap) -> {Dominators,Numbering}. +%% Calculate the dominator tree, returning a map where each entry +%% in the map is a list that gives the path from that block to +%% the top of the dominator tree. (Note that the suffixes of the +%% paths are shared with each other, which make the representation +%% of the dominator tree highly memory-efficient.) +%% +%% The implementation is based on: +%% +%% http://www.hipersoft.rice.edu/grads/publications/dom14.pdf +%% Cooper, Keith D.; Harvey, Timothy J; Kennedy, Ken (2001). +%% A Simple, Fast Dominance Algorithm. + -spec dominators(Blocks) -> Result when Blocks :: block_map(), - Result :: dominator_map(). - + Result :: {dominator_map(), numbering_map()}. dominators(Blocks) -> Preds = predecessors(Blocks), Top0 = rpo(Blocks), - Top = [{L,map_get(L, Preds)} || L <- Top0], + Df = maps:from_list(number(Top0, 0)), + [{0,[]}|Top] = [{L,map_get(L, Preds)} || L <- Top0], %% The flow graph for an Erlang function is reducible, and %% therefore one traversal in reverse postorder is sufficient. - iter_dominators(Top, #{}). + Acc = #{0=>[0]}, + {dominators_1(Top, Df, Acc),Df}. + +%% common_dominators([Label], Dominators, Numbering) -> [Label]. +%% Calculate the common dominators for the given list of blocks +%% and Dominators and Numbering as returned from dominators/1. + +-spec common_dominators([label()], dominator_map(), numbering_map()) -> [label()]. +common_dominators(Ls, Dom, Numbering) -> + Doms = [map_get(L, Dom) || L <- Ls], + dom_intersection(Doms, Numbering). -spec fold_instrs_rpo(Fun, From, Acc0, Blocks) -> any() when Fun :: fun((b_blk()|terminator(), any()) -> any()), @@ -657,14 +681,37 @@ def_is([#b_set{dst=Dst}|Is], Def) -> def_is(Is, [Dst|Def]); def_is([], Def) -> Def. -iter_dominators([{0,[]}|Ls], _Doms) -> - Dom = [0], - iter_dominators(Ls, #{0=>Dom}); -iter_dominators([{L,Preds}|Ls], Doms) -> +dominators_1([{L,Preds}|Ls], Df, Doms) -> DomPreds = [map_get(P, Doms) || P <- Preds, is_map_key(P, Doms)], - Dom = ordsets:add_element(L, ordsets:intersection(DomPreds)), - iter_dominators(Ls, Doms#{L=>Dom}); -iter_dominators([], Doms) -> Doms. + Dom = [L|dom_intersection(DomPreds, Df)], + dominators_1(Ls, Df, Doms#{L=>Dom}); +dominators_1([], _Df, Doms) -> Doms. + +dom_intersection([S], _Df) -> + S; +dom_intersection([S|Ss], Df) -> + dom_intersection(S, Ss, Df). + +dom_intersection(S1, [S2|Ss], Df) -> + dom_intersection(dom_intersection_1(S1, S2, Df), Ss, Df); +dom_intersection(S, [], _Df) -> S. + +dom_intersection_1([E1|Es1]=Set1, [E2|Es2]=Set2, Df) -> + %% Blocks are numbered in the order they are found in + %% reverse postorder. + #{E1:=Df1,E2:=Df2} = Df, + if Df1 > Df2 -> + dom_intersection_1(Es1, Set2, Df); + Df2 > Df1 -> + dom_intersection_1(Es2, Set1, Df); %switch arguments! + true -> %Set1 == Set2 + %% The common suffix of the sets is the intersection. + Set1 + end. + +number([L|Ls], N) -> + [{L,N}|number(Ls, N+1)]; +number([], _) -> []. fold_rpo_1([L|Ls], Fun, Blocks, Acc0) -> Block = map_get(L, Blocks), diff --git a/lib/compiler/src/beam_ssa_bsm.erl b/lib/compiler/src/beam_ssa_bsm.erl index 466337db0e..382e6f635e 100644 --- a/lib/compiler/src/beam_ssa_bsm.erl +++ b/lib/compiler/src/beam_ssa_bsm.erl @@ -300,7 +300,8 @@ get_fa(#b_function{ anno = Anno }) -> promotions = #{} :: promotion_map() }). alias_matched_binaries(Blocks0, Counter, AliasMap) when AliasMap =/= #{} -> - State0 = #amb{ dominators = beam_ssa:dominators(Blocks0), + {Dominators, _} = beam_ssa:dominators(Blocks0), + State0 = #amb{ dominators = Dominators, match_aliases = AliasMap, cnt = Counter }, {Blocks, State} = beam_ssa:mapfold_blocks_rpo(fun amb_1/3, [0], State0, @@ -347,7 +348,7 @@ amb_get_alias(#b_var{}=Arg, Lbl, State) -> %% Our context may not have been created yet, so we skip assigning %% an alias unless the given block is among our dominators. Dominators = maps:get(Lbl, State#amb.dominators), - case ordsets:is_element(AliasAfter, Dominators) of + case member(AliasAfter, Dominators) of true -> amb_create_alias(Arg, Context, Lbl, State); false -> {Arg, State} end; @@ -444,6 +445,7 @@ combine_matches({Fs0, ModInfo}) -> combine_matches(#b_function{bs=Blocks0,cnt=Counter0}=F, ModInfo) -> case funcinfo_get(F, has_bsm_ops, ModInfo) of true -> + {Dominators, _} = beam_ssa:dominators(Blocks0), {Blocks1, State} = beam_ssa:mapfold_blocks_rpo( fun(Lbl, #b_blk{is=Is0}=Block0, State0) -> @@ -451,7 +453,7 @@ combine_matches(#b_function{bs=Blocks0,cnt=Counter0}=F, ModInfo) -> {Block0#b_blk{is=Is}, State} end, [0], #cm{ definitions = beam_ssa:definitions(Blocks0), - dominators = beam_ssa:dominators(Blocks0), + dominators = Dominators, blocks = Blocks0 }, Blocks0), @@ -491,7 +493,7 @@ cm_handle_priors(Src, DstCtx, Bool, Acc, MatchSeq, Lbl, State0) -> %% dominate us. Dominators = maps:get(Lbl, State0#cm.dominators, []), [Ctx || {ValidAfter, Ctx} <- Priors, - ordsets:is_element(ValidAfter, Dominators)]; + member(ValidAfter, Dominators)]; error -> [] end, diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index ca5eefe4fc..533cc04aac 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -2021,7 +2021,7 @@ do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> Used = used_blocks(Linear, Defs, []), %% Calculate dominators. - Dom0 = beam_ssa:dominators(Blocks0), + {Dom,Numbering} = beam_ssa:dominators(Blocks0), %% It is not safe to move get_tuple_element instructions to blocks %% that begin with certain instructions. It is also unsafe to move @@ -2029,20 +2029,10 @@ do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> %% unsafe moves, pretend that the unsuitable blocks are not %% dominators. Unsuitable = unsuitable(Linear, Blocks0), - Dom = case gb_sets:is_empty(Unsuitable) of - true -> - Dom0; - false -> - F = fun(_, DomBy) -> - [L || L <- DomBy, - not gb_sets:is_element(L, Unsuitable)] - end, - maps:map(F, Dom0) - end, %% Calculate new positions for get_tuple_element instructions. The new %% position is a block that dominates all uses of the variable. - DefLoc = new_def_locations(Used, Defs, Dom), + DefLoc = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable), %% Now move all suitable get_tuple_element instructions to their %% new blocks. @@ -2136,50 +2126,42 @@ unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> end; unsuitable_loop_1([], _, _, Acc) -> Acc. -%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) -> -%% [{Variable,NewDefinitionBlock}] -%% Calculate new locations for get_tuple_element instructions. For each -%% variable, the new location is a block that dominates all uses of -%% variable and as near to the uses of as possible. If no such block -%% distinct from the block where the instruction currently is, the -%% variable will not be included in the result list. +%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, +%% Dominators, Numbering, Unsuitable) -> +%% [{Variable,NewDefinitionBlock}] +%% +%% Calculate new locations for get_tuple_element instructions. For +%% each variable, the new location is a block that dominates all uses +%% of the variable and as near to the uses of as possible. -new_def_locations([{V,UsedIn}|Vs], Defs, Dom) -> +new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) -> DefIn = map_get(V, Defs), - case common_dom(UsedIn, DefIn, Dom) of - [] -> - new_def_locations(Vs, Defs, Dom); - [_|_]=BetterDef -> - L = most_dominated(BetterDef, Dom), - [{V,L}|new_def_locations(Vs, Defs, Dom)] - end; -new_def_locations([], _, _) -> []. - -common_dom([L|Ls], DefIn, Dom) -> - DomBy0 = map_get(L, Dom), - DomBy = ordsets:subtract(DomBy0, map_get(DefIn, Dom)), - common_dom_1(Ls, Dom, DomBy). - -common_dom_1(_, _, []) -> - []; -common_dom_1([L|Ls], Dom, [_|_]=DomBy0) -> - DomBy1 = map_get(L, Dom), - DomBy = ordsets:intersection(DomBy0, DomBy1), - common_dom_1(Ls, Dom, DomBy); -common_dom_1([], _, DomBy) -> DomBy. - -most_dominated([L|Ls], Dom) -> - most_dominated(Ls, L, map_get(L, Dom), Dom). - -most_dominated([L|Ls], L0, DomBy, Dom) -> - case member(L, DomBy) of + Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable), + case member(Common, map_get(DefIn, Dom)) of true -> - most_dominated(Ls, L0, DomBy, Dom); + %% The common dominator is either DefIn or an + %% ancestor of DefIn. + new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable); false -> - most_dominated(Ls, L, map_get(L, Dom), Dom) + %% We have found a suitable descendant of DefIn, + %% to which the get_tuple_element instruction can + %% be sunk. + [{V,Common}|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)] end; -most_dominated([], L, _, _) -> L. +new_def_locations([], _, _, _, _) -> []. +common_dominator(Ls0, Dom, Numbering, Unsuitable) -> + [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering), + case gb_sets:is_member(Common, Unsuitable) of + true -> + %% It is not allowed to place the instruction here. Try + %% to find another suitable dominating block by going up + %% one step in the dominator tree. + [Common,OneUp|_] = map_get(Common, Dom), + common_dominator([OneUp], Dom, Numbering, Unsuitable); + false -> + Common + end. %% Move get_tuple_element instructions to their new locations. diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 274f78052d..b4a7517841 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -874,7 +874,7 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> %% a stack frame or set up a stack frame with a different size. place_frames(#st{ssa=Blocks}=St) -> - Doms = beam_ssa:dominators(Blocks), + {Doms,_} = beam_ssa:dominators(Blocks), Ls = beam_ssa:rpo(Blocks), Tried = gb_sets:empty(), Frames0 = [], @@ -1001,7 +1001,7 @@ phi_predecessors(L, Blocks) -> is_dominated_by(L, DomBy, Doms) -> DominatedBy = map_get(L, Doms), - ordsets:is_element(DomBy, DominatedBy). + member(DomBy, DominatedBy). %% need_frame(#b_blk{}) -> true|false. %% Test whether any of the instructions in the block requires a stack frame. |