From 077cfc59f1dca89aa1231ce291100aa4b33c50e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 5 Feb 2019 02:46:45 +0100
Subject: Optimize ssa_opt_sink for huge functions

The ssa_opt_sink optimization of beam_ssa_opt could get very slow
for certain huge functions. 9a190cae9bd7 partly addressed this issue
by terminating the optimization early if there happened to be no
get_tuple_element instructions at all in the function.

This commit addresses the issue more directly by making the dominator
calculation in beam_ssa:dominators/1 more efficient. The same
algorithm as before is used, but it is implemented in a more efficient
way based on the ideas in "A Simple, Fast Dominance Algorithm"
(http://www.hipersoft.rice.edu/grads/publications/dom14.pdf).

As well as being more efficient, the new implementation also gives
an explicit representation of the dominator tree, which makes it
possible to simplify and optimize the ssa_opt_sink optimization.
---
 lib/compiler/src/beam_ssa.erl | 73 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 13 deletions(-)

(limited to 'lib/compiler/src/beam_ssa.erl')

diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index 9c29c98064..0f662d851d 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -23,7 +23,7 @@
 -export([add_anno/3,get_anno/2,get_anno/3,
          clobbers_xregs/1,def/2,def_used/2,
          definitions/1,
-         dominators/1,
+         dominators/1,common_dominators/3,
          flatmapfold_instrs_rpo/4,
          fold_po/3,fold_po/4,fold_rpo/3,fold_rpo/4,
          fold_instrs_rpo/4,
@@ -85,7 +85,8 @@
 -type anno() :: #{atom() := any()}.
 
 -type block_map() :: #{label():=b_blk()}.
--type dominator_map() :: #{label():=ordsets:ordset(label())}.
+-type dominator_map() :: #{label():=[label()]}.
+-type numbering_map() :: #{label():=non_neg_integer()}.
 -type usage_map() :: #{b_var():=[{label(),b_set() | terminator()}]}.
 -type definition_map() :: #{b_var():=b_set()}.
 -type rename_map() :: #{b_var():=value()}.
@@ -327,18 +328,41 @@ def_used(Ls, Blocks) ->
     Preds = cerl_sets:from_list(Top),
     def_used_1(Blks, Preds, [], []).
 
+%% dominators(BlockMap) -> {Dominators,Numbering}.
+%%  Calculate the dominator tree, returning a map where each entry
+%%  in the map is a list that gives the path from that block to
+%%  the top of the dominator tree. (Note that the suffixes of the
+%%  paths are shared with each other, which make the representation
+%%  of the dominator tree highly memory-efficient.)
+%%
+%%  The implementation is based on:
+%%
+%%     http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
+%%     Cooper, Keith D.; Harvey, Timothy J; Kennedy, Ken (2001).
+%%        A Simple, Fast Dominance Algorithm.
+
 -spec dominators(Blocks) -> Result when
       Blocks :: block_map(),
-      Result :: dominator_map().
-
+      Result :: {dominator_map(), numbering_map()}.
 dominators(Blocks) ->
     Preds = predecessors(Blocks),
     Top0 = rpo(Blocks),
-    Top = [{L,map_get(L, Preds)} || L <- Top0],
+    Df = maps:from_list(number(Top0, 0)),
+    [{0,[]}|Top] = [{L,map_get(L, Preds)} || L <- Top0],
 
     %% The flow graph for an Erlang function is reducible, and
     %% therefore one traversal in reverse postorder is sufficient.
-    iter_dominators(Top, #{}).
+    Acc = #{0=>[0]},
+    {dominators_1(Top, Df, Acc),Df}.
+
+%% common_dominators([Label], Dominators, Numbering) -> [Label].
+%%  Calculate the common dominators for the given list of blocks
+%%  and Dominators and Numbering as returned from dominators/1.
+
+-spec common_dominators([label()], dominator_map(), numbering_map()) -> [label()].
+common_dominators(Ls, Dom, Numbering) ->
+    Doms = [map_get(L, Dom) || L <- Ls],
+    dom_intersection(Doms, Numbering).
 
 -spec fold_instrs_rpo(Fun, From, Acc0, Blocks) -> any() when
       Fun :: fun((b_blk()|terminator(), any()) -> any()),
@@ -657,14 +681,37 @@ def_is([#b_set{dst=Dst}|Is], Def) ->
     def_is(Is, [Dst|Def]);
 def_is([], Def) -> Def.
 
-iter_dominators([{0,[]}|Ls], _Doms) ->
-    Dom = [0],
-    iter_dominators(Ls, #{0=>Dom});
-iter_dominators([{L,Preds}|Ls], Doms) ->
+dominators_1([{L,Preds}|Ls], Df, Doms) ->
     DomPreds = [map_get(P, Doms) || P <- Preds, is_map_key(P, Doms)],
-    Dom = ordsets:add_element(L, ordsets:intersection(DomPreds)),
-    iter_dominators(Ls, Doms#{L=>Dom});
-iter_dominators([], Doms) -> Doms.
+    Dom = [L|dom_intersection(DomPreds, Df)],
+    dominators_1(Ls, Df, Doms#{L=>Dom});
+dominators_1([], _Df, Doms) -> Doms.
+
+dom_intersection([S], _Df) ->
+    S;
+dom_intersection([S|Ss], Df) ->
+    dom_intersection(S, Ss, Df).
+
+dom_intersection(S1, [S2|Ss], Df) ->
+    dom_intersection(dom_intersection_1(S1, S2, Df), Ss, Df);
+dom_intersection(S, [], _Df) -> S.
+
+dom_intersection_1([E1|Es1]=Set1, [E2|Es2]=Set2, Df) ->
+    %% Blocks are numbered in the order they are found in
+    %% reverse postorder.
+    #{E1:=Df1,E2:=Df2} = Df,
+    if Df1 > Df2 ->
+            dom_intersection_1(Es1, Set2, Df);
+       Df2 > Df1 ->
+            dom_intersection_1(Es2, Set1, Df);  %switch arguments!
+       true ->                                  %Set1 == Set2
+            %% The common suffix of the sets is the intersection.
+            Set1
+    end.
+
+number([L|Ls], N) ->
+    [{L,N}|number(Ls, N+1)];
+number([], _) -> [].
 
 fold_rpo_1([L|Ls], Fun, Blocks, Acc0) ->
     Block = map_get(L, Blocks),
-- 
cgit v1.2.3