From cf047293ecf6ea108a1e5a412743bfb5fe66e26f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Fri, 16 Sep 2016 13:34:24 +0200
Subject: hipe: Add module computing basic blocks weights

hipe_bb_weights computes basic block weights by using the branch
probability predictions as the coefficients in a linear equation system.
This linear equation system is then solved using Gauss-Jordan
Elimination.

The equation system representation is picked to be efficient with highly
sparse data. During triangelisation, the remaining equations are
dynamically reordered in order to prevent the equations from growing in
the common case, preserving the benefit of the sparse equation
representation.

In the case that the input is very big, hipe_bb_weights automatically
falls back to a rough approximation in order to keep compile times under
control.
---
 lib/hipe/opt/Makefile            |   3 +-
 lib/hipe/opt/hipe_bb_weights.erl | 449 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 451 insertions(+), 1 deletion(-)
 create mode 100644 lib/hipe/opt/hipe_bb_weights.erl

(limited to 'lib/hipe/opt')

diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile
index 684d6f45b4..5a729d04ae 100644
--- a/lib/hipe/opt/Makefile
+++ b/lib/hipe/opt/Makefile
@@ -43,7 +43,8 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)
 # ----------------------------------------------------
 # Target Specs
 # ----------------------------------------------------
-MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan
+MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan \
+	hipe_bb_weights
 
 HRL_FILES=
 ERL_FILES= $(MODULES:%=%.erl)
diff --git a/lib/hipe/opt/hipe_bb_weights.erl b/lib/hipe/opt/hipe_bb_weights.erl
new file mode 100644
index 0000000000..8ef113b94c
--- /dev/null
+++ b/lib/hipe/opt/hipe_bb_weights.erl
@@ -0,0 +1,449 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	                BASIC BLOCK WEIGHTING
+%%
+%% Computes basic block weights by using branch probabilities as weights in a
+%% linear equation system, that is then solved using Gauss-Jordan Elimination.
+%%
+%% The equation system representation is intentionally sparse, since most blocks
+%% have at most two successors.
+-module(hipe_bb_weights).
+-export([compute/3, compute_fast/3, weight/2, call_exn_pred/0]).
+-export_type([bb_weights/0]).
+
+-compile(inline).
+
+%%-define(DO_ASSERT,1).
+%%-define(DEBUG,1).
+-include("../main/hipe.hrl").
+
+%% If the equation system is large, it might take too long to solve it exactly.
+%% Thus, if there are more than ?HEUR_MAX_SOLVE labels, we use the iterative
+%% approximation.
+-define(HEUR_MAX_SOLVE, 10000).
+
+-opaque bb_weights() :: #{label() => float()}.
+
+-type cfg() :: any().
+-type target_module() :: module().
+-type target_context() :: any().
+-type target() :: {target_module(), target_context()}.
+
+-type label()            :: integer().
+-type var()              :: label().
+-type assignment()       :: {var(), float()}.
+-type eq_assoc()         :: [{var(), key()}].
+-type solution()         :: [assignment()].
+
+%% Constant. Predicted probability of a call resulting in an exception.
+-spec call_exn_pred() -> float().
+call_exn_pred() -> 0.01.
+
+-spec compute(cfg(), target_module(), target_context()) -> bb_weights().
+compute(CFG, TgtMod, TgtCtx) ->
+  Target = {TgtMod, TgtCtx},
+  Labels = labels(CFG, Target),
+  if length(Labels) > ?HEUR_MAX_SOLVE ->
+      ?debug_msg("~w: Too many labels (~w), approximating.~n",
+		 [?MODULE, length(Labels)]),
+      compute_fast(CFG, TgtMod, TgtCtx);
+     true ->
+      {EqSys, EqAssoc} = build_eq_system(CFG, Labels, Target),
+      case solve(EqSys, EqAssoc) of
+	{ok, Solution} ->
+	  maps:from_list(Solution)
+      end
+  end.
+
+-spec build_eq_system(cfg(), [label()], target()) -> {eq_system(), eq_assoc()}.
+build_eq_system(CFG, Labels, Target) ->
+  StartLb = hipe_gen_cfg:start_label(CFG),
+  EQS0 = eqs_new(),
+  {EQS1, Assoc} = build_eq_system(Labels, CFG, Target, [], EQS0),
+  {StartLb, StartKey} = lists:keyfind(StartLb, 1, Assoc),
+  StartRow0 = eqs_get(StartKey, EQS1),
+  StartRow = row_set_const(-1.0, StartRow0), % -1.0 since StartLb coef is -1.0
+  EQS = eqs_put(StartKey, StartRow, EQS1),
+  {EQS, Assoc}.
+
+build_eq_system([], _CFG, _Target, Map, EQS) -> {EQS, lists:reverse(Map)};
+build_eq_system([L|Ls], CFG, Target, Map, EQS0) ->
+  PredProb = pred_prob(L, CFG, Target),
+  {Key, EQS} = eqs_insert(row_new([{L, -1.0}|PredProb], 0.0), EQS0),
+  build_eq_system(Ls, CFG, Target, [{L, Key}|Map], EQS).
+
+pred_prob(L, CFG, Target) ->
+  [begin
+     BB = bb(CFG, Pred, Target),
+     Ps = branch_preds(hipe_bb:last(BB), Target),
+     ?ASSERT(length(lists:ukeysort(1, Ps))
+	     =:= length(hipe_gen_cfg:succ(CFG, Pred))),
+     case lists:keyfind(L, 1, Ps) of
+       {L, Prob} when is_float(Prob) -> {Pred, Prob}
+     end
+   end || Pred <- hipe_gen_cfg:pred(CFG, L)].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-spec triangelise(eq_system(), eq_assoc()) -> {eq_system(), eq_assoc()}.
+triangelise(EQS, VKs) ->
+  triangelise_1(mk_triix(EQS, VKs), []).
+
+triangelise_1(TIX0, Acc) ->
+  case triix_is_empty(TIX0) of
+    true -> {triix_eqs(TIX0), lists:reverse(Acc)};
+    false ->
+      {V,Key,TIX1} = triix_pop_smallest(TIX0),
+      Row0 = triix_get(Key, TIX1),
+      case row_get(V, Row0) of
+	Coef when Coef > -0.0001, Coef < 0.0001 ->
+	  throw(error);
+	_ ->
+	  Row = row_normalise(V, Row0),
+	  TIX2 = triix_put(Key, Row, TIX1),
+	  TIX = eliminate_triix(V, Key, Row, TIX2),
+	  triangelise_1(TIX, [{V,Key}|Acc])
+      end
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Triangelisation maintains its own index, outside of eqs. This index is
+%% essentially a BST (used as a heap) of all equations by size, with {Key,Var}
+%% as the values and only containing a subset of all the keys in the whole
+%% equation system. The key operation is triix_pop_smallest/1, which pops a
+%% {Key,Var} from the heap corresponding to one of the smallest equations. This
+%% is critical in order to prevent the equations from growing during
+%% triangelisation, which would make the algorithm O(n^2) in the common case.
+-type tri_eq_system() :: {eq_system(),
+			  gb_trees:tree(non_neg_integer(),
+					gb_trees:tree(key(), var()))}.
+
+triix_eqs({EQS, _}) -> EQS.
+triix_get(Key, {EQS, _}) -> eqs_get(Key, EQS).
+triix_is_empty({_, Tree}) -> gb_trees:is_empty(Tree).
+triix_lookup(V, {EQS, _}) -> eqs_lookup(V, EQS).
+
+mk_triix(EQS, VKs) ->
+  {EQS,
+   lists:foldl(fun({V,Key}, Tree) ->
+		   Size = row_size(eqs_get(Key, EQS)),
+		   sitree_insert(Size, Key, V, Tree)
+	       end, gb_trees:empty(), VKs)}.
+
+sitree_insert(Size, Key, V, SiTree) ->
+  SubTree1 =
+    case gb_trees:lookup(Size, SiTree) of
+      none -> gb_trees:empty();
+      {value, SubTree0} -> SubTree0
+    end,
+  SubTree = gb_trees:insert(Key, V, SubTree1),
+  gb_trees:enter(Size, SubTree, SiTree).
+
+sitree_update_subtree(Size, SubTree, SiTree) ->
+  case gb_trees:is_empty(SubTree) of
+    true -> gb_trees:delete(Size, SiTree);
+    false -> gb_trees:update(Size, SubTree, SiTree)
+  end.
+
+triix_put(Key, Row, {EQS, Tree0}) ->
+  OldSize = row_size(eqs_get(Key, EQS)),
+  case row_size(Row) of
+    OldSize -> {eqs_put(Key, Row, EQS), Tree0};
+    Size ->
+      Tree =
+	case gb_trees:lookup(OldSize, Tree0) of
+	  none -> Tree0;
+	  {value, SubTree0} ->
+	    case gb_trees:lookup(Key, SubTree0) of
+	      none -> Tree0;
+	      {value, V} ->
+		SubTree = gb_trees:delete(Key, SubTree0),
+		Tree1 = sitree_update_subtree(OldSize, SubTree, Tree0),
+		sitree_insert(Size, Key, V, Tree1)
+	    end
+	end,
+      {eqs_put(Key, Row, EQS), Tree}
+  end.
+
+triix_pop_smallest({EQS, Tree}) ->
+  {Size, SubTree0} = gb_trees:smallest(Tree),
+  {Key, V, SubTree} = gb_trees:take_smallest(SubTree0),
+  {V, Key, {EQS, sitree_update_subtree(Size, SubTree, Tree)}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+row_normalise(Var, Row) ->
+  %% Normalise v's coef to 1.0
+  %% row_set_coef ensures the coef is exactly 1.0 (no rounding errors)
+  row_set_coef(Var, 1.0, row_scale(Row, 1.0/row_get(Var, Row))).
+
+%% Precondition: Row must be normalised; i.e. Vars coef must be 1.0 (mod
+%% rounding errors)
+-spec eliminate(var(), key(), row(), eq_system()) -> eq_system().
+eliminate(Var, Key, Row, TIX0) ->
+  eliminate_abstr(Var, Key, Row, TIX0,
+		  fun eqs_get/2, fun eqs_lookup/2, fun eqs_put/3).
+
+-spec eliminate_triix(var(), key(), row(), tri_eq_system()) -> tri_eq_system().
+eliminate_triix(Var, Key, Row, TIX0) ->
+  eliminate_abstr(Var, Key, Row, TIX0,
+		  fun triix_get/2, fun triix_lookup/2, fun triix_put/3).
+
+%% The same function implemented for two data types, eqs and triix.
+-compile({inline, eliminate_abstr/7}).
+-spec eliminate_abstr(var(), key(), row(), ADT, fun((key(), ADT) -> row()),
+		      fun((var(), ADT) -> [key()]),
+		      fun((key(), row(), ADT) -> ADT)) -> ADT.
+eliminate_abstr(Var, Key, Row, ADT0, GetFun, LookupFun, PutFun) ->
+  ?ASSERT(1.0 =:= row_get(Var, Row)),
+  ADT =
+    lists:foldl(fun(RK, ADT1) when RK =:= Key -> ADT1;
+		   (RK, ADT1) ->
+		    R = GetFun(RK, ADT1),
+		    PutFun(RK, row_addmul(R, Row, -row_get(Var, R)), ADT1)
+		end, ADT0, LookupFun(Var, ADT0)),
+  [Key] = LookupFun(Var, ADT),
+  ADT.
+
+-spec solve(eq_system(), eq_assoc()) -> error | {ok, solution()}.
+solve(EQS0, EqAssoc0) ->
+  try triangelise(EQS0, EqAssoc0)
+  of {EQS1, EqAssoc} ->
+      {ok, solve_1(EqAssoc, maps:from_list(EqAssoc), EQS1, [])}
+  catch error -> error
+  end.
+
+solve_1([], _VarEqs, _EQS, Acc) -> Acc;
+solve_1([{V,K}|Ps], VarEqs, EQS0, Acc0) ->
+  Row0 = eqs_get(K, EQS0),
+  VarsToKill = [Var || {Var, _} <- row_coefs(Row0), Var =/= V],
+  Row1 = kill_vars(VarsToKill, VarEqs, EQS0, Row0),
+  [{V,_}] = row_coefs(Row1), % assertion
+  Row = row_normalise(V, Row1),
+  [{V,1.0}] = row_coefs(Row), % assertion
+  EQS = eliminate(V, K, Row, EQS0),
+  [K] = eqs_lookup(V, EQS),
+  solve_1(Ps, VarEqs, eqs_remove(K, EQS), [{V, row_const(Row)}|Acc0]).
+
+kill_vars([], _VarEqs, _EQS, Row) -> Row;
+kill_vars([V|Vs], VarEqs, EQS, Row0) ->
+  VRow0 = eqs_get(maps:get(V, VarEqs), EQS),
+  VRow = row_normalise(V, VRow0),
+  ?ASSERT(1.0 =:= row_get(V, VRow)),
+  Row = row_addmul(Row0, VRow, -row_get(V, Row0)),
+  ?ASSERT(0.0 =:= row_get(V, Row)), % V has been killed
+  kill_vars(Vs, VarEqs, EQS, Row).
+
+-spec weight(label(), bb_weights()) -> float().
+weight(Lbl, Weights) ->
+  maps:get(Lbl, Weights).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Row datatype
+%% Invariant: No 0.0 coefficiets!
+-spec row_empty() -> row().
+row_empty() -> {orddict:new(), 0.0}.
+
+-spec row_new([{var(), float()}], float()) -> row().
+row_new(Coefs, Const) when is_float(Const) ->
+  row_ensure_invar({row_squash_multiples(lists:keysort(1, Coefs)), Const}).
+
+row_squash_multiples([{K, C1},{K, C2}|Ps]) ->
+  row_squash_multiples([{K,C1+C2}|Ps]);
+row_squash_multiples([P|Ps]) -> [P|row_squash_multiples(Ps)];
+row_squash_multiples([]) -> [].
+
+row_ensure_invar({Coef, Const}) ->
+  {orddict:filter(fun(_, 0.0) -> false; (_, F) when is_float(F) -> true end,
+		  Coef), Const}.
+
+row_const({_, Const}) -> Const.
+row_coefs({Coefs, _}) -> orddict:to_list(Coefs).
+row_size({Coefs, _}) -> orddict:size(Coefs).
+
+row_get(Var, {Coefs, _}) ->
+  case lists:keyfind(Var, 1, Coefs) of
+    false -> 0.0;
+    {_, Coef} -> Coef
+  end.
+
+row_set_coef(Var, 0.0, {Coefs, Const}) ->
+  {orddict:erase(Var, Coefs), Const};
+row_set_coef(Var, Coef, {Coefs, Const}) ->
+  {orddict:store(Var, Coef, Coefs), Const}.
+
+row_set_const(Const, {Coefs, _}) -> {Coefs, Const}.
+
+%% Lhs + Rhs*Factor
+-spec row_addmul(row(), row(), float()) -> row().
+row_addmul({LhsCoefs, LhsConst}, {RhsCoefs, RhsConst}, Factor)
+  when is_float(Factor) ->
+  Coefs = row_addmul_coefs(LhsCoefs, RhsCoefs, Factor),
+  Const = LhsConst + RhsConst * Factor,
+  {Coefs, Const}.
+
+row_addmul_coefs(Ls, [], Factor) when is_float(Factor) -> Ls;
+row_addmul_coefs([], Rs, Factor) when is_float(Factor) ->
+  row_scale_coefs(Rs, Factor);
+row_addmul_coefs([L={LV, _}|Ls], Rs=[{RV,_}|_], Factor)
+  when LV < RV, is_float(Factor) ->
+  [L|row_addmul_coefs(Ls, Rs, Factor)];
+row_addmul_coefs(Ls=[{LV, _}|_], [{RV, RC}|Rs], Factor)
+  when LV > RV, is_float(RC), is_float(Factor) ->
+  [{RV, RC*Factor}|row_addmul_coefs(Ls, Rs, Factor)];
+row_addmul_coefs([{V, LC}|Ls], [{V, RC}|Rs], Factor)
+  when is_float(LC), is_float(RC), is_float(Factor) ->
+  case LC + RC * Factor of
+    0.0 ->      row_addmul_coefs(Ls, Rs, Factor);
+    C -> [{V,C}|row_addmul_coefs(Ls, Rs, Factor)]
+  end.
+
+row_scale(_, 0.0) -> row_empty();
+row_scale({RowCoefs, RowConst}, Factor) when is_float(Factor) ->
+  {row_scale_coefs(RowCoefs, Factor), RowConst * Factor}.
+
+row_scale_coefs([{V,C}|Cs], Factor) when is_float(Factor), is_float(C) ->
+  [{V,C*Factor}|row_scale_coefs(Cs, Factor)];
+row_scale_coefs([], Factor) when is_float(Factor) ->
+  [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Equation system ADT
+%%
+%% Stores a linear equation system, allowing for efficient updates and efficient
+%% queries for all equations mentioning a variable.
+%%
+%% It is sort of like a "database" table of {Primary, Terms, Const} indexed both
+%% on Primary as well as the vars (map keys) in Terms.
+-type row()       :: {Terms :: orddict:orddict(var(), float()),
+		      Const :: float()}.
+-type key()       :: non_neg_integer().
+-type rev_index() :: #{var() => ordsets:ordset(key())}.
+-record(eq_system, {
+	  rows = #{}              :: #{key() => row()},
+	  revidx = revidx_empty() :: rev_index(),
+	  next_key = 0            :: key()
+	 }).
+-type eq_system() :: #eq_system{}.
+
+eqs_new() -> #eq_system{}.
+
+-spec eqs_insert(row(), eq_system()) -> {key(), eq_system()}.
+eqs_insert(Row, EQS=#eq_system{next_key=NextKey0}) ->
+  Key = NextKey0,
+  NextKey = NextKey0 + 1,
+  {Key, eqs_insert(Key, Row, EQS#eq_system{next_key=NextKey})}.
+
+eqs_insert(Key, Row, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) ->
+  RevIdx = revidx_add(Key, Row, RevIdx0),
+  EQS#eq_system{rows=Rows#{Key => Row}, revidx=RevIdx}.
+
+eqs_put(Key, Row, EQS0) ->
+  eqs_insert(Key, Row, eqs_remove(Key, EQS0)).
+
+eqs_remove(Key, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) ->
+  OldRow = maps:get(Key, Rows),
+  RevIdx = revidx_remove(Key, OldRow, RevIdx0),
+  EQS#eq_system{rows = maps:remove(Key, Rows), revidx=RevIdx}.
+
+-spec eqs_get(key(), eq_system()) -> row().
+eqs_get(Key, #eq_system{rows=Rows}) -> maps:get(Key, Rows).
+
+%% Keys of all equations containing a nonzero coefficient for Var
+-spec eqs_lookup(var(), eq_system()) -> ordsets:ordset(key()).
+eqs_lookup(Var, #eq_system{revidx=RevIdx}) -> maps:get(Var, RevIdx).
+
+%% eqs_rows(#eq_system{rows=Rows}) -> maps:to_list(Rows).
+
+%% eqs_print(EQS) ->
+%%   lists:foreach(fun({_, Row}) ->
+%% 		    row_print(Row)
+%% 		end, lists:sort(eqs_rows(EQS))).
+
+%% row_print(Row) ->
+%%   CoefStrs = [io_lib:format("~wl~w", [Coef, Var])
+%% 	      || {Var, Coef} <- row_coefs(Row)],
+%%   CoefStr = lists:join(" + ", CoefStrs),
+%%   io:format("~w = ~s~n", [row_const(Row), CoefStr]).
+
+revidx_empty() -> #{}.
+
+-spec revidx_add(key(), row(), rev_index()) -> rev_index().
+revidx_add(Key, Row, RevIdx0) ->
+  orddict:fold(fun(Var, _Coef, RevIdx1) ->
+		?ASSERT(_Coef /= 0.0),
+		RevIdx1#{Var => ordsets:add_element(
+				  Key, maps:get(Var, RevIdx1, ordsets:new()))}
+	    end, RevIdx0, row_coefs(Row)).
+
+-spec revidx_remove(key(), row(), rev_index()) -> rev_index().
+revidx_remove(Key, {Coefs, _}, RevIdx0) ->
+  orddict:fold(fun(Var, _Coef, RevIdx1) ->
+		case RevIdx1 of
+		  #{Var := Keys0} ->
+		    case ordsets:del_element(Key, Keys0) of
+		      [] -> maps:remove(Var, RevIdx1);
+		      Keys -> RevIdx1#{Var := Keys}
+		    end
+		end
+	    end, RevIdx0, Coefs).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(FAST_ITERATIONS, 5).
+
+%% @doc Computes a rough approximation of BB weights. The approximation is
+%% particularly poor (converges slowly) for recursive functions and loops.
+-spec compute_fast(cfg(), target_module(), target_context()) -> bb_weights().
+compute_fast(CFG, TgtMod, TgtCtx) ->
+  Target = {TgtMod, TgtCtx},
+  StartLb = hipe_gen_cfg:start_label(CFG),
+  RPO = reverse_postorder(CFG, Target),
+  PredProbs = [{L, pred_prob(L, CFG, Target)} || L <- RPO, L =/= StartLb],
+  Probs0 = (maps:from_list([{L, 0.0} || L <- RPO]))#{StartLb := 1.0},
+  fast_iterate(?FAST_ITERATIONS, PredProbs, Probs0).
+
+fast_iterate(0, _Pred, Probs) -> Probs;
+fast_iterate(Iters, Pred, Probs0) ->
+  fast_iterate(Iters-1, Pred,
+	       fast_one(Pred, Probs0)).
+
+fast_one([{L, Pred}|Ls], Probs0) ->
+  Weight = fast_sum(Pred, Probs0, 0.0),
+  Probs = Probs0#{L => Weight},
+  fast_one(Ls, Probs);
+fast_one([], Probs) ->
+  Probs.
+
+fast_sum([{P,EWt}|Pred], Probs, Acc) when is_float(EWt), is_float(Acc) ->
+  case Probs of
+    #{P := PWt} when is_float(PWt) ->
+      fast_sum(Pred, Probs, Acc + PWt * EWt)
+  end;
+fast_sum([], _Probs, Acc) when is_float(Acc) ->
+  Acc.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(branch_preds).
+?TGT_IFACE_1(labels).
+?TGT_IFACE_1(reverse_postorder).
-- 
cgit v1.2.3


From e7b26d9d6a36b315518cc2743eb179ccec3b7a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Mon, 6 Mar 2017 18:31:27 +0100
Subject: hipe_spillmin_color: Move coalescing

---
 lib/hipe/opt/hipe_spillmin_color.erl | 62 +++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 11 deletions(-)

(limited to 'lib/hipe/opt')

diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl
index 41f1972df7..28a4f4bca5 100644
--- a/lib/hipe/opt/hipe_spillmin_color.erl
+++ b/lib/hipe/opt/hipe_spillmin_color.erl
@@ -212,16 +212,26 @@ build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) ->
     build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping),
   build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping).
 
-build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) ->
+build_ig_instr(X, Live0, IG0, Target, TempMap, TempMapping) ->
   {Def, Use} = def_use(X, Target, TempMap),
-  ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]),
+  ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live0, X, Def,Use]),
   DefListMapped = list_map(Def, TempMapping, []),
   UseListMapped = list_map(Use, TempMapping, []),
   DefSetMapped = ordsets:from_list(DefListMapped),
   UseSetMapped = ordsets:from_list(UseListMapped),
-  NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG),
-  NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)),
-  {NewLive, NewIG}.
+  {Live1, IG1} =
+    analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped),
+  IG = interference_arcs(DefListMapped, ordsets:to_list(Live1), IG1),
+  Live = ordsets:union(UseSetMapped, ordsets:subtract(Live1, DefSetMapped)),
+  {Live, IG}.
+
+analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped) ->
+  case {is_spill_move(X, Target), DefSetMapped, UseSetMapped} of
+    {true, [Dst], [Src]} ->
+      {ordsets:del_element(Src, Live0), add_move(Src, Dst, IG0)};
+    {_, _, _} ->
+      {Live0, IG0}
+  end.
 
 %% Given a list of Keys and an ets-table returns a list of the elements 
 %% in Mapping corresponding to the Keys and appends Acc to this list.
@@ -382,7 +392,8 @@ select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) ->
 
 select_color(X, IG, Cols, PhysRegs) ->
   UsedColors = get_colors(neighbors(X, IG), Cols),
-  Reg = select_unused_color(UsedColors, PhysRegs),
+  Preferences = get_colors(move_connected(X, IG), Cols),
+  Reg = select_unused_color(UsedColors, Preferences, PhysRegs),
   {Reg, set_color(X, Reg, Cols)}.
 
 %%%%%%%%%%%%%%%%%%%%
@@ -396,10 +407,14 @@ get_colors([X|Xs], Cols) ->
       [R|get_colors(Xs, Cols)]
   end.
 
-select_unused_color(UsedColors, PhysRegs) ->
+select_unused_color(UsedColors, Preferences, PhysRegs) ->
   Summary = ordsets:from_list(UsedColors),
-  AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)),
-  hd(AvailRegs).
+  case ordsets:subtract(ordsets:from_list(Preferences), Summary) of
+    [PreferredColor|_] -> PreferredColor;
+    _ ->
+      AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)),
+      hd(AvailRegs)
+  end.
 
 push_colored(X, Stk) ->
   [{X, colorable} | Stk].
@@ -456,7 +471,11 @@ init_stackslots(NumSlots, Acc) ->
 %%
 %% Note: later on, we may wish to add 'move-related' support.
 
--record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}).
+-record(ig_info, {
+	  neighbors = []      :: [_],
+	  degree = 0          :: non_neg_integer(),
+	  move_connected = [] :: [_]
+	 }).
 
 empty_ig(NumNodes) ->
   hipe_vectors:new(NumNodes, #ig_info{}).
@@ -467,16 +486,29 @@ degree(Info) ->
 neighbors(Info) ->
   Info#ig_info.neighbors.
 
+move_connected(Info) ->
+  Info#ig_info.move_connected.
+
 add_edge(X, X, IG) -> IG;
 add_edge(X, Y, IG) ->
   add_arc(X, Y, add_arc(Y, X, IG)).
 
+add_move(X, X, IG) -> IG;
+add_move(X, Y, IG) ->
+  add_move_arc(X, Y, add_move_arc(Y, X, IG)).
+
 add_arc(X, Y, IG) ->
   Info = hipe_vectors:get(IG, X),
   Old = neighbors(Info),
   New = Info#ig_info{neighbors = [Y|Old]},
   hipe_vectors:set(IG,X,New).
 
+add_move_arc(X, Y, IG) ->
+  Info = hipe_vectors:get(IG, X),
+  Old = move_connected(Info),
+  New = Info#ig_info{move_connected = [Y|Old]},
+  hipe_vectors:set(IG,X,New).
+
 normalize_ig(IG) ->
   Size = hipe_vectors:size(IG),
   normalize_ig(Size-1, IG).
@@ -486,7 +518,8 @@ normalize_ig(-1, IG) ->
 normalize_ig(I, IG) ->
   Info = hipe_vectors:get(IG, I),
   N = ordsets:from_list(neighbors(Info)),
-  NewInfo = Info#ig_info{neighbors = N, degree = length(N)},
+  M = ordsets:subtract(ordsets:from_list(move_connected(Info)), N),
+  NewInfo = Info#ig_info{neighbors = N, degree = length(N), move_connected = M},
   NewIG = hipe_vectors:set(IG, I, NewInfo),
   normalize_ig(I-1, NewIG).
 
@@ -494,6 +527,10 @@ neighbors(X, IG) ->
   Info = hipe_vectors:get(IG, X),
   Info#ig_info.neighbors.
 
+move_connected(X, IG) ->
+  Info = hipe_vectors:get(IG, X),
+  Info#ig_info.move_connected.
+
 decrement_degree(X, IG) ->
   Info = hipe_vectors:get(IG, X),
   Degree = degree(Info),
@@ -555,3 +592,6 @@ def_use(X, Target={TgtMod,TgtCtx}, TempMap) ->
 
 reg_names(Regs, {TgtMod,TgtCtx}) ->
   [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
+
+is_spill_move(Instr, {TgtMod,TgtCtx}) ->
+  TgtMod:is_spill_move(Instr, TgtCtx).
-- 
cgit v1.2.3


From 9e618caac607379e1154e24bc9bd09709cce5d41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 16:08:17 +0100
Subject: hipe_spillmin_color: cleanup exit calls

---
 lib/hipe/opt/hipe_spillmin_color.erl | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

(limited to 'lib/hipe/opt')

diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl
index 28a4f4bca5..f87d9a5b61 100644
--- a/lib/hipe/opt/hipe_spillmin_color.erl
+++ b/lib/hipe/opt/hipe_spillmin_color.erl
@@ -166,9 +166,13 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) ->
 %%
 
 build_ig(CFG, Live, Target, TempMap) ->
-  try build_ig0(CFG, Live, Target, TempMap)
-  catch error:Rsn -> exit({regalloc, build_ig, Rsn})
-  end.
+  TempMapping = map_spilled_temporaries(TempMap),
+  TempMappingTable = setup_ets(TempMapping),
+  NumSpilled = length(TempMapping),
+  IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled),
+		    Target, TempMap, TempMappingTable),
+  ets:delete(TempMappingTable),
+  {normalize_ig(IG), NumSpilled}.
 
 %% Creates an ETS table consisting of the keys given in List, with the values
 %% being an integer which is the position of the key in List.
@@ -183,15 +187,6 @@ setup_ets0([X|Xs], Table, N) ->
   ets:insert(Table, {X, N}),
   setup_ets0(Xs, Table, N+1).
 
-build_ig0(CFG, Live, Target, TempMap) ->
-  TempMapping = map_spilled_temporaries(TempMap),
-  TempMappingTable = setup_ets(TempMapping),
-  NumSpilled = length(TempMapping),
-  IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled),
-		    Target, TempMap, TempMappingTable),
-  ets:delete(TempMappingTable),
-  {normalize_ig(IG), NumSpilled}.
-
 build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) ->
   IG;
 build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) ->
@@ -281,15 +276,6 @@ i_arcs(X, [Y|Ys], IG) ->
 %%     throw an exception (the caller should retry with more stack slots)
 
 color(IG, StackSlots, NumNodes, Target) ->
-  try
-    color_0(IG, StackSlots, NumNodes, Target)
-  catch
-    error:Rsn ->
-      ?error_msg("Coloring failed with ~p~n", [Rsn]),
-      ?EXIT(Rsn)
-  end.
-
-color_0(IG, StackSlots, NumNodes, Target) -> 
   ?report("simplification of IG~n", []),
   K = ordsets:size(StackSlots),
   Nodes = list_ig(IG),
-- 
cgit v1.2.3