From f9263b9173905d4e7a53350d4f374c5020c52738 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 15:50:09 +0100
Subject: hipe: Extract disjoint sets to its own module

---
 lib/hipe/regalloc/hipe_regalloc_prepass.erl | 71 +++++------------------------
 1 file changed, 12 insertions(+), 59 deletions(-)

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
index e212420ad2..5024840237 100644
--- a/lib/hipe/regalloc/hipe_regalloc_prepass.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
@@ -483,8 +483,8 @@ merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets};
 merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) ->
   merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]);
 merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets}  = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets}  = hipe_dsets:find({exit,L},  DSets1),
   case EntryRoot =:= ExitRoot of
     false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]);
     true ->
@@ -501,7 +501,7 @@ merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
 
 -spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}.
 merge_small_parts(DSets0) ->
-  {RLList, DSets1} = dsets_to_rllist(DSets0),
+  {RLList, DSets1} = hipe_dsets:to_rllist(DSets0),
   RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList],
   merge_small_parts_1(RLLList, DSets1, []).
 
@@ -518,8 +518,8 @@ merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc)
   merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]);
 merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) ->
   ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS),
-  DSets1 = dsets_union(R1, R2, DSets0),
-  {R, DSets} = dsets_find(R1, DSets1),
+  DSets1 = hipe_dsets:union(R1, R2, DSets0),
+  {R, DSets} = hipe_dsets:find(R1, DSets1),
   merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc).
 
 %% @doc Partition an ordering over BBs into subsequences for the dsets that
@@ -531,8 +531,8 @@ part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}).
 
 part_order([], DSets, Acc) -> {Acc, DSets};
 part_order([L|Ls], DSets0, Acc0) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets2} = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets2} = hipe_dsets:find({exit,L},  DSets1),
   Acc1 = map_append(EntryRoot, L, Acc0),
   %% Only include the label once if both entry and exit is in same partition
   Acc2 = case EntryRoot =:= ExitRoot of
@@ -558,72 +558,25 @@ map_append(Key, Elem, Map) ->
 %% split point, and one from the end to the last split point.
 
 -type bb_dset_key() :: {entry | exit, label()}.
--type bb_dsets() :: dsets(bb_dset_key()).
+-type bb_dsets() :: hipe_dsets:dsets(bb_dset_key()).
 -type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}].
 
 -spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets().
 initial_dsets(CFG, TgtMod, TgtCtx) ->
   Labels = TgtMod:labels(CFG, TgtCtx),
-  DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
+  DSets0 = hipe_dsets:new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
   Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)]
 			|| L <- Labels]),
-  lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end,
+  lists:foldl(fun({X, Y}, DS) -> hipe_dsets:union({exit,X}, {entry,Y}, DS) end,
 	      DSets0, Edges).
 
 -spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets().
 join_whole_blocks(PartBBList, DSets0) ->
-  lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS);
+  lists:foldl(fun({L, {single, _}}, DS) ->
+		  hipe_dsets:union({entry,L}, {exit,L}, DS);
 		 ({_, {split, _, _}}, DS) -> DS
 	      end, DSets0, PartBBList).
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% The disjoint set forests data structure, for elements of arbitrary types.
-%% Note that the find operation mutates the set.
-%%
-%% We could do this more efficiently if we restricted the elements to integers,
-%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used,
-%% for a persistent interface (which isn't that nice when even accessors return
-%% modified copies), the array module could be used.
--type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}.
-
--spec dsets_new([E]) -> dsets(E).
-dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]).
-
--spec dsets_find(E, dsets(E)) -> {E, dsets(E)}.
-dsets_find(E, DS0) ->
-  case DS0 of
-    #{E := {root,_}} -> {E, DS0};
-    #{E := {node,N}} ->
-      case dsets_find(N, DS0) of
-	{N, _}=T -> T;
-	{R, DS1} -> {R, DS1#{E := {node,R}}}
-      end
-   ;_ -> error(badarg, [E, DS0])
-  end.
-
--spec dsets_union(E, E, dsets(E)) -> dsets(E).
-dsets_union(X, Y, DS0) ->
-  {XRoot, DS1} = dsets_find(X, DS0),
-  case dsets_find(Y, DS1) of
-    {XRoot, DS2} -> DS2;
-    {YRoot, DS2} ->
-      #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2,
-      if XRR < YRR -> DS2#{XRoot := {node,YRoot}};
-	 XRR > YRR -> DS2#{YRoot := {node,XRoot}};
-	 true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}}
-      end
-  end.
-
--spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}.
-dsets_to_rllist(DS0) ->
-  {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0),
-  {maps:to_list(Lists), DS}.
-
-dsets_to_rllist([], Acc, DS) -> {Acc, DS};
-dsets_to_rllist([E|Es], Acc, DS0) ->
-  {ERoot, DS} = dsets_find(E, DS0),
-  dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS).
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Third pass
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-- 
cgit v1.2.3


From dbe626aa7beb0f04403f6782443f3a78d0f1fdb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 14:55:23 +0100
Subject: hipe: Add basic range splitting ra callbacks

In addition to the temporary name rewriting that hipe_regalloc_prepass
does, range splitters also need to be able to insert move instructions,
as well as inserting new basic blocks in the control flow graph. The
following four callbacks are added for that purpose:

 * Target:mk_move(Src, Dst, Context)
   Returns a move instruction from the temporary (not just register
   number) Src to Dst.
 * Target:mk_goto(Label, Context)
   Returns a unconditional control flow instruction that branches to the
   label with name Label.
 * Target:redirect_jmp(Instr, ToOld, ToNew, Context)
   Modifies the control flow instruction Instr so that any control flow
   that would go to a label with name ToOld instead goes to the label
   with name ToNew.
 * Target:new_label(Context)
   Returns a fresh label name that does not belong to any existing block
   in the current function, and is to be used to create a new basic
   block in the control flow graph by calling Target:update_bb/4 with
   this new name.
---
 lib/hipe/regalloc/hipe_amd64_specific_sse2.erl | 29 ++++++++++++++++++++++++--
 lib/hipe/regalloc/hipe_arm_specific.erl        | 29 ++++++++++++++++++++++++--
 lib/hipe/regalloc/hipe_ppc_specific.erl        | 20 ++++++++++++++++--
 lib/hipe/regalloc/hipe_ppc_specific_fp.erl     | 20 ++++++++++++++++--
 lib/hipe/regalloc/hipe_sparc_specific.erl      | 20 ++++++++++++++++--
 lib/hipe/regalloc/hipe_sparc_specific_fp.erl   | 20 ++++++++++++++++--
 lib/hipe/regalloc/hipe_x86_specific.erl        | 29 ++++++++++++++++++++++++--
 7 files changed, 153 insertions(+), 14 deletions(-)

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 9682d37520..341233e851 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -50,8 +50,12 @@
 -export([check_and_rewrite/3,
 	 check_and_rewrite/4]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -188,6 +192,27 @@ is_move(Instruction, _) ->
 reg_nr(Reg, _) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index cef22e5af9..224837a34a 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -46,8 +46,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -147,6 +151,27 @@ is_move(Instruction, _) ->
 reg_nr(Reg, _) ->
   hipe_arm:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_arm:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_arm:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_arm_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(arm).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(arm).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index a6450b4d96..bc4bd1972e 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -46,8 +46,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -147,6 +151,18 @@ is_move(Instruction, _) ->
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 23cb6c0318..3aab14f061 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -46,8 +46,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -128,6 +132,18 @@ is_move(I, _) ->
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_fmove(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 31fca81316..12642709b9 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -46,8 +46,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -147,6 +151,18 @@ is_move(Instruction, _) ->
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index 050d65e1a9..ba4849d2d5 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -46,8 +46,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -128,6 +132,18 @@ is_move(I, _) ->
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index c1c8dbbcd6..0d9d7b751e 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -63,8 +63,12 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
@@ -203,6 +207,27 @@ is_move(Instruction,_) ->
 reg_nr(Reg,_) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
-- 
cgit v1.2.3


From e99f1d41bc8a7e035e35fd5aef6f3ea023d7f12e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 16:38:22 +0100
Subject: hipe: Add range splitter restore_reuse

hipe_restore_reuse is a simplistic range splitter that splits temps that
are forced onto the stack by being live over call instructions. In
particular, it attempts to avoid cases where there are several accesses
to such stack allocated temps in straight-line code, uninterrupted by
any calls. In order to achieve this it splits temps between just before
the first access(es) and just after the last access(es) in such
straight-line code groups.

The hipe_restore_reuse pass is controlled by a new option
ra_restore_reuse.
ra_restore_reuse is added to o1.
---
 lib/hipe/regalloc/Makefile               |   1 +
 lib/hipe/regalloc/hipe_regalloc_loop.erl |  15 +-
 lib/hipe/regalloc/hipe_restore_reuse.erl | 503 +++++++++++++++++++++++++++++++
 3 files changed, 517 insertions(+), 2 deletions(-)
 create mode 100644 lib/hipe/regalloc/hipe_restore_reuse.erl

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile
index 209f230a9b..73ac232f51 100644
--- a/lib/hipe/regalloc/Makefile
+++ b/lib/hipe/regalloc/Makefile
@@ -52,6 +52,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \
 	  hipe_graph_coloring_regalloc \
 	  hipe_regalloc_loop \
 	  hipe_regalloc_prepass \
+	  hipe_restore_reuse \
 	  hipe_ls_regalloc \
 	  hipe_ppc_specific hipe_ppc_specific_fp \
 	  hipe_sparc_specific hipe_sparc_specific_fp \
diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl
index 5bbb0ba7c1..5d32073437 100644
--- a/lib/hipe/regalloc/hipe_regalloc_loop.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl
@@ -32,9 +32,11 @@ ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) ->
 ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod,
 	  TargetCtx) ->
   ?inc_counter(ra_calls_counter, 1),
-  SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx),
+  {CFG1, Liveness1} =
+    do_range_split(CFG0, Liveness0, TargetMod, TargetCtx, Options),
+  SpillLimit0 = TargetMod:number_of_temporaries(CFG1, TargetCtx),
   {Coloring, _, CFG, Liveness} =
-    call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options,
+    call_allocator_initial(CFG1, Liveness1, SpillLimit0, SpillIndex, Options,
 			   RegAllocMod, TargetMod, TargetCtx),
   %% The first iteration, the hipe_regalloc_prepass may create new temps, these
   %% should not end up above SpillLimit.
@@ -96,3 +98,12 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod,
       RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod,
 			   TargetCtx, Options)
   end.
+
+do_range_split(CFG0, Liveness, TgtMod, TgtCtx, Options) ->
+  case proplists:get_bool(ra_restore_reuse, Options) of
+    true ->
+      CFG1 = hipe_restore_reuse:split(CFG0, Liveness, TgtMod, TgtCtx),
+      {CFG1, TgtMod:analyze(CFG1, TgtCtx)};
+    false ->
+      {CFG0, Liveness}
+  end.
diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl
new file mode 100644
index 0000000000..2d8dea2a2c
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_restore_reuse.erl
@@ -0,0 +1,503 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       RESTORE REUSE LIVE RANGE SPLITTING PASS
+%%
+%% This is a simple live range splitter that tries to avoid sequences where a
+%% temporary is accessed on stack multiple times by keeping a copy of that temp
+%% around in a register.
+%%
+%% At any point where a temporary that is expected to be spilled (see uses of
+%% spills_add_list/2) is defined or used, this pass considers that temporary
+%% "available".
+%%
+%% Limitations:
+%%   * If a live range part starts with several different restores, this module
+%%     will introduce a new temp number for each of them, and later be forced to
+%%     generate phi blocks. It would be more efficient to introduce just a
+%%     single temp number. That would also remove the need for the phi blocks.
+%%   * If a live range part ends in a definition, that definition should just
+%%     define the base temp rather than the substitution, since some CISC
+%%     targets might be able to inline the memory access in the instruction.
+-module(hipe_restore_reuse).
+
+-export([split/4]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+-include("../main/hipe.hrl").
+
+-type target_cfg()       :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type label()            :: non_neg_integer().
+-type reg()              :: non_neg_integer().
+-type instr()            :: any().
+-type temp()             :: any().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context())
+	   -> target_cfg().
+split(CFG, Liveness, TargetMod, TargetContext) ->
+  Target = {TargetMod, TargetContext},
+  Avail = analyse(CFG, Liveness, Target),
+  rewrite(CFG, Target, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-type avail() :: #{label() => avail_bb()}.
+
+-record(avail_bb, {
+	  %% Blocks where HasCall is true are considered to have too high
+	  %% register pressure to support a register copy of a temp
+	  has_call :: boolean(),
+	  %% AvailOut: Temps that can be split (are available)
+	  out      :: availset(),
+	  %% Gen: AvailOut generated locally
+	  gen      :: availset(),
+	  %% WantIn: Temps that are split
+	  want     :: regset(),
+	  %% Self: Temps with avail-want pairs locally
+	  self     :: regset(),
+	  %% DefIn: Temps shadowed by later def in same live range part
+	  defin    :: regset(),
+	  pred     :: [label()],
+	  succ     :: [label()]
+	 }).
+-type avail_bb() :: #avail_bb{}.
+
+avail_get(L, Avail) -> maps:get(L, Avail).
+avail_set(L, Val, Avail) -> maps:put(L, Val, Avail).
+avail_has_call(L, Avail) -> (avail_get(L, Avail))#avail_bb.has_call.
+avail_out(L, Avail) -> (avail_get(L, Avail))#avail_bb.out.
+avail_self(L, Avail) -> (avail_get(L, Avail))#avail_bb.self.
+avail_pred(L, Avail) -> (avail_get(L, Avail))#avail_bb.pred.
+avail_succ(L, Avail) -> (avail_get(L, Avail))#avail_bb.succ.
+
+avail_in(L, Avail) ->
+  case avail_pred(L, Avail) of
+    [] -> availset_empty(); % entry
+    Pred ->
+      lists:foldl(fun(P, ASet) ->
+		      availset_intersect(avail_out(P, Avail), ASet)
+		  end, availset_top(), Pred)
+  end.
+
+want_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.want.
+want_out(L, Avail) ->
+  lists:foldl(fun(S, Set) ->
+		  ordsets:union(want_in(S, Avail), Set)
+	      end, ordsets:new(), avail_succ(L, Avail)).
+
+def_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.defin.
+def_out(L, Avail) ->
+  case avail_succ(L, Avail) of
+    [] -> ordsets:new(); % entry
+    Succ ->
+      ordsets:intersection([def_in(S, Avail) || S <- Succ])
+  end.
+
+-type regset()  :: ordsets:ordset(reg()).
+-type availset() :: top | regset().
+availset_empty() -> [].
+availset_top() -> top.
+availset_intersect(top, B) -> B;
+availset_intersect(A, top) -> A;
+availset_intersect(A, B) -> ordsets:intersection(A, B).
+availset_union(top, _) -> top;
+availset_union(_, top) -> top;
+availset_union(A, B) -> ordsets:union(A, B).
+ordset_intersect_availset(OS, top) -> OS;
+ordset_intersect_availset(OS, AS) -> ordsets:intersection(OS, AS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Analysis pass
+%%
+%% The analysis pass collects the set of temps we're interested in splitting
+%% (Spills), and computes three dataflow analyses for this subset of temps.
+%%
+%% Avail, which is the set of temps which are available in register from a
+%%   previous (potential) spill or restore without going through a HasCall
+%%   block.
+%% Want, which is a liveness analysis for the subset of temps used by an
+%%   instruction that are also in Avail at that point. In other words, Want is
+%%   the set of temps that are split (has a register copy) at a particular
+%%   point.
+%% Def, which are the temps that are already going to be spilled later, and so
+%%   need not be spilled when they're defined.
+%%
+%% Lastly, it computes the set Self for each block, which is the temps that have
+%% avail-want pairs in the same block, and so should be split in that block even
+%% if they're not in WantIn for the block.
+
+-spec analyse(target_cfg(), liveness(), target()) -> avail().
+analyse(CFG, Liveness, Target) ->
+  Avail0 = analyse_init(CFG, Liveness, Target),
+  RPO = reverse_postorder(CFG, Target),
+  AvailLs = [L || L <- RPO, not avail_has_call(L, Avail0)],
+  Avail1 = avail_dataf(AvailLs, Avail0),
+  Avail2 = analyse_filter_want(maps:keys(Avail1), Avail1),
+  PO = lists:reverse(RPO),
+  want_dataf(PO, Avail2).
+
+-spec analyse_init(target_cfg(), liveness(), target()) -> avail().
+analyse_init(CFG, Liveness, Target) ->
+  analyse_init(labels(CFG, Target), CFG, Liveness, Target, #{}, []).
+
+-spec analyse_init([label()], target_cfg(), liveness(), target(), spillset(),
+		   [{label(), avail_bb()}])
+		  -> avail().
+analyse_init([], _CFG, _Liveness, Target, Spills0, Acc) ->
+  %% Precoloured temps can't be spilled
+  Spills = spills_filter(fun(R) -> not is_precoloured(R, Target) end, Spills0),
+  analyse_init_1(Acc, Spills, []);
+analyse_init([L|Ls], CFG, Liveness, Target, Spills0, Acc) ->
+  {DefIn, Gen, Self, Want, HasCall0} =
+    analyse_scan(hipe_bb:code(bb(CFG, L, Target)), Target,
+		 ordsets:new(), ordsets:new(), ordsets:new(),
+		 ordsets:new()),
+  {Spills, Out, HasCall} =
+    case HasCall0 of
+      false -> {Spills0, availset_top(), false};
+      {true, CallDefs} ->
+	Spill = ordsets:subtract(liveout(Liveness, L, Target), CallDefs),
+	{spills_add_list(Spill, Spills0), Gen, true}
+    end,
+  Pred = hipe_gen_cfg:pred(CFG, L),
+  Succ = hipe_gen_cfg:succ(CFG, L),
+  Val = #avail_bb{gen=Gen, want=Want, self=Self, out=Out, has_call=HasCall,
+		  pred=Pred, succ=Succ, defin=DefIn},
+  analyse_init(Ls, CFG, Liveness, Target, Spills, [{L, Val} | Acc]).
+
+-spec analyse_init_1([{label(), avail_bb()}], spillset(),
+		     [{label(), avail_bb()}])
+		    -> avail().
+analyse_init_1([], _Spills, Acc) -> maps:from_list(Acc);
+analyse_init_1([{L, Val0}|Vs], Spills, Acc) ->
+  #avail_bb{out=Out,gen=Gen,want=Want,self=Self} = Val0,
+  Val = Val0#avail_bb{
+	  out  = spills_filter_availset(Out,  Spills),
+	  gen  = spills_filter_availset(Gen,  Spills),
+	  want = spills_filter_availset(Want, Spills),
+	  self = spills_filter_availset(Self, Spills)},
+  analyse_init_1(Vs, Spills, [{L, Val} | Acc]).
+
+-type spillset() :: #{reg() => []}.
+-spec spills_add_list([reg()], spillset()) -> spillset().
+spills_add_list([], Spills) -> Spills;
+spills_add_list([R|Rs], Spills) -> spills_add_list(Rs, Spills#{R => []}).
+
+-spec spills_filter_availset(availset(), spillset()) -> availset().
+spills_filter_availset([E|Es], Spills) ->
+  case Spills of
+    #{E := _} -> [E|spills_filter_availset(Es, Spills)];
+    #{} ->          spills_filter_availset(Es, Spills)
+  end;
+spills_filter_availset([], _) -> [];
+spills_filter_availset(top, _) -> top.
+
+spills_filter(Fun, Spills) -> maps:filter(fun(K, _) -> Fun(K) end, Spills).
+
+-spec analyse_scan([instr()], target(), Defset, Gen, Self, Want)
+		  -> {Defset, Gen, Self, Want, HasCall} when
+    HasCall :: false | {true, regset()},
+    Defset  :: regset(),
+    Gen     :: availset(),
+    Self    :: regset(),
+    Want    :: regset().
+analyse_scan([], _Target, Defs, Gen, Self, Want) ->
+  {Defs, Gen, Self, Want, false};
+analyse_scan([I|Is], Target, Defs0, Gen0, Self0, Want0) ->
+  {DefL, UseL} = reg_def_use(I, Target),
+  Use = ordsets:from_list(UseL),
+  Def = ordsets:from_list(DefL),
+  Self = ordsets:union(ordsets:intersection(Use, Gen0), Self0),
+  Want = ordsets:union(ordsets:subtract(Use, Defs0), Want0),
+  Defs = ordsets:union(Def, Defs0),
+  case defines_all_alloc(I, Target) of
+    true ->
+      [] = Is, %assertion
+      {Defs, ordsets:new(), Self, Want, {true, Def}};
+    false ->
+      Gen = ordsets:union(ordsets:union(Def, Use), Gen0),
+      analyse_scan(Is, Target, Defs, Gen, Self, Want)
+  end.
+
+-spec avail_dataf([label()], avail()) -> avail().
+avail_dataf(RPO, Avail0) ->
+  case avail_dataf_once(RPO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      avail_dataf(RPO, Avail)
+  end.
+
+-spec avail_dataf_once([label()], avail(), non_neg_integer())
+		      -> {avail(), non_neg_integer()}.
+avail_dataf_once([], Avail, Changed) -> {Avail, Changed};
+avail_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB = #avail_bb{out=OldOut, gen=Gen} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  {Changed, Avail} =
+    case availset_union(In, Gen) of
+      OldOut -> {Changed0, Avail0};
+      Out -> {Changed0+1, avail_set(L, ABB#avail_bb{out=Out}, Avail0)}
+    end,
+  avail_dataf_once(Ls, Avail, Changed).
+
+-spec analyse_filter_want([label()], avail()) -> avail().
+analyse_filter_want([], Avail) -> Avail;
+analyse_filter_want([L|Ls], Avail0) ->
+  ABB = #avail_bb{want=Want0, defin=DefIn0} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  Want = ordset_intersect_availset(Want0, In),
+  DefIn = ordset_intersect_availset(DefIn0, In),
+  Avail = avail_set(L, ABB#avail_bb{want=Want, defin=DefIn}, Avail0),
+  analyse_filter_want(Ls, Avail).
+
+-spec want_dataf([label()], avail()) -> avail().
+want_dataf(PO, Avail0) ->
+  case want_dataf_once(PO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      want_dataf(PO, Avail)
+  end.
+
+-spec want_dataf_once([label()], avail(), non_neg_integer())
+		     -> {avail(), non_neg_integer()}.
+want_dataf_once([], Avail, Changed) -> {Avail, Changed};
+want_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB0 = #avail_bb{want=OldIn,defin=OldDef} = avail_get(L, Avail0),
+  AvailIn = avail_in(L, Avail0),
+  Out = want_out(L, Avail0),
+  DefOut = def_out(L, Avail0),
+  {Changed, Avail} =
+    case {ordsets:union(ordset_intersect_availset(Out,    AvailIn), OldIn),
+	  ordsets:union(ordset_intersect_availset(DefOut, AvailIn), OldDef)}
+    of
+      {OldIn, OldDef} -> {Changed0, Avail0};
+      {In, DefIn} ->
+	ABB = ABB0#avail_bb{want=In,defin=DefIn},
+	{Changed0+1, avail_set(L, ABB, Avail0)}
+    end,
+  want_dataf_once(Ls, Avail, Changed).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Rewrite pass
+-type subst_dict() :: orddict:orddict(reg(), reg()).
+-type input()      :: #{label() => subst_dict()}.
+
+-spec rewrite(target_cfg(), target(), avail()) -> target_cfg().
+rewrite(CFG, Target, Avail) ->
+  RPO = reverse_postorder(CFG, Target),
+  rewrite(RPO, Target, Avail, #{}, CFG).
+
+-spec rewrite([label()], target(), avail(), input(), target_cfg())
+	     -> target_cfg().
+rewrite([], _Target, _Avail, _Input, CFG) -> CFG;
+rewrite([L|Ls], Target, Avail, Input0, CFG0) ->
+  SplitHere = split_in_block(L, Avail),
+  {Input1, LInput} =
+    case Input0 of
+      #{L := LInput0} -> {Input0, LInput0};
+      #{} -> {Input0#{L => []}, []} % entry block
+    end,
+  ?ASSERT([] =:= [X || X <- SplitHere, orddict:is_key(X, LInput)]),
+  ?ASSERT(want_in(L, Avail) =:= orddict:fetch_keys(LInput)),
+  {CFG1, LOutput} =
+    case {SplitHere, LInput} of
+      {[], []} -> % optimisation (rewrite will do nothing, so skip it)
+	{CFG0, LInput};
+      _ ->
+	Code0 = hipe_bb:code(BB=bb(CFG0, L, Target)),
+	DefOut = def_out(L, Avail),
+	{Code, LOutput0, _DefIn} =
+	  rewrite_instrs(Code0, Target, LInput, DefOut, SplitHere),
+	{update_bb(CFG0, L, hipe_bb:code_update(BB, Code), Target), LOutput0}
+    end,
+  {Input, CFG} = rewrite_succs(avail_succ(L, Avail), Target, L, LOutput, Avail,
+			       Input1, CFG1),
+  rewrite(Ls, Target, Avail, Input, CFG).
+
+-spec split_in_block(label(), avail()) -> ordsets:ordset(reg()).
+split_in_block(L, Avail) ->
+  ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)),
+		   want_in(L, Avail)).
+
+-spec rewrite_instrs([instr()], target(), subst_dict(), regset(), [reg()])
+		    ->  {[instr()], subst_dict(), regset()}.
+rewrite_instrs([], _Target, Output, DefOut, []) ->
+  {[], Output, DefOut};
+rewrite_instrs([I|Is], Target, Input0, BBDefOut, SplitHere0) ->
+  {TDef, TUse} = def_use(I, Target),
+  {Def, Use} = {reg_names(TDef, Target), reg_names(TUse, Target)},
+  %% Restores are generated in forward order by picking temps from SplitHere as
+  %% they're used or defined. After the last instruction, all temps have been
+  %% picked.
+  {ISplits, SplitHere} =
+    lists:partition(fun(R) ->
+			lists:member(R, Def) orelse lists:member(R, Use)
+		    end, SplitHere0),
+  {Input, Restores} =
+    case ISplits of
+      [] -> {Input0, []};
+      _ ->
+	make_splits(ISplits, Target, TDef, TUse, Input0, [])
+    end,
+  %% Here's the recursive call
+  {Acc0, Output, DefOut} =
+    rewrite_instrs(Is, Target, Input, BBDefOut, SplitHere),
+  %% From here we're processing instructions in reverse order, because to avoid
+  %% redundant spills we need to walk the 'def' dataflow, which is in reverse.
+  SubstFun = fun(Temp) ->
+		 case orddict:find(reg_nr(Temp, Target), Input) of
+		   {ok, NewTemp} -> NewTemp;
+		   error -> Temp
+		 end
+	     end,
+  Acc1 = insert_spills(TDef, Target, Input, DefOut, Acc0),
+  Acc = Restores ++ [subst_temps(SubstFun, I, Target) | Acc1],
+  DefIn = ordsets:union(DefOut, ordsets:from_list(Def)),
+  {Acc, Output, DefIn}.
+
+-spec make_splits([reg()], target(), [temp()], [temp()], subst_dict(),
+		  [instr()])
+		 -> {subst_dict(), [instr()]}.
+make_splits([], _Target, _TDef, _TUse, Input, Acc) ->
+  {Input, Acc};
+make_splits([S|Ss], Target, TDef, TUse, Input0, Acc0) ->
+  SubstReg = new_reg_nr(Target),
+  {Acc, Subst} =
+    case find_reg_temp(S, TUse, Target) of
+      error ->
+	{ok, Temp} = find_reg_temp(S, TDef, Target),
+	{Acc0, update_reg_nr(SubstReg, Temp, Target)};
+      {ok, Temp} ->
+	Subst0 = update_reg_nr(SubstReg, Temp, Target),
+	Acc1 = [mk_move(Temp, Subst0, Target) | Acc0],
+	{Acc1, Subst0}
+    end,
+  Input = orddict:store(S, Subst, Input0),
+  make_splits(Ss, Target, TDef, TUse, Input, Acc).
+
+-spec find_reg_temp(reg(), [temp()], target()) -> error | {ok, temp()}.
+find_reg_temp(_Reg, [], _Target) -> error;
+find_reg_temp(Reg, [T|Ts], Target) ->
+  case reg_nr(T, Target) of
+    Reg -> {ok, T};
+    _ -> find_reg_temp(Reg, Ts, Target)
+  end.
+
+-spec insert_spills([temp()], target(), subst_dict(), regset(), [instr()])
+		   -> [instr()].
+insert_spills([], _Target, _Input, _DefOut, Acc) -> Acc;
+insert_spills([T|Ts], Target, Input, DefOut, Acc0) ->
+  R = reg_nr(T, Target),
+  Acc =
+    case orddict:find(R, Input) of
+      error -> Acc0;
+      {ok, Subst} ->
+	case lists:member(R, DefOut) of
+	  true -> Acc0;
+	  false -> [mk_move(Subst, T, Target) | Acc0]
+	end
+    end,
+  insert_spills(Ts, Target, Input, DefOut, Acc).
+
+-spec rewrite_succs([label()], target(), label(), subst_dict(), avail(),
+		    input(), target_cfg()) -> {input(), target_cfg()}.
+rewrite_succs([], _Target, _P, _POutput, _Avail, Input, CFG) -> {Input, CFG};
+rewrite_succs([L|Ls], Target, P, POutput, Avail, Input0, CFG0) ->
+  NewLInput = orddict_with_ordset(want_in(L, Avail), POutput),
+  {Input, CFG} =
+    case Input0 of
+      #{L := LInput} ->
+	CFG2 =
+	  case required_phi_moves(LInput, NewLInput) of
+	    [] -> CFG0;
+	    ReqMovs ->
+	      PhiLb = new_label(Target),
+	      Code = [mk_move(S,D,Target) || {S,D} <- ReqMovs]
+		++ [mk_goto(L, Target)],
+	      PhiBB = hipe_bb:mk_bb(Code),
+	      CFG1 = update_bb(CFG0, PhiLb, PhiBB, Target),
+	      bb_redirect_jmp(L, PhiLb, P, CFG1, Target)
+	  end,
+	{Input0, CFG2};
+      #{} ->
+	{Input0#{L => NewLInput}, CFG0}
+    end,
+  rewrite_succs(Ls, Target, P, POutput, Avail, Input, CFG).
+
+-spec bb_redirect_jmp(label(), label(), label(), target_cfg(), target())
+		     -> target_cfg().
+bb_redirect_jmp(From, To, Lb, CFG, Target) ->
+  BB0 = bb(CFG, Lb, Target),
+  Last = redirect_jmp(hipe_bb:last(BB0), From, To, Target),
+  BB = hipe_bb:code_update(BB0, hipe_bb:butlast(BB0) ++ [Last]),
+  update_bb(CFG, Lb, BB, Target).
+
+-spec required_phi_moves(subst_dict(), subst_dict()) -> [{reg(), reg()}].
+required_phi_moves([], []) -> [];
+required_phi_moves([P|Is], [P|Os]) -> required_phi_moves(Is, Os);
+required_phi_moves([{K, In}|Is], [{K, Out}|Os]) ->
+  [{Out, In}|required_phi_moves(Is, Os)].
+
+%% @doc Returns a new orddict with the keys in Set and their associated values.
+-spec orddict_with_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			 -> orddict:orddict(K, V).
+orddict_with_ordset([S|Ss], [{K, _}|_]=Dict) when S < K ->
+  orddict_with_ordset(Ss, Dict);
+orddict_with_ordset([S|_]=Set, [{K, _}|Ds]) when S > K ->
+  orddict_with_ordset(Set, Ds);
+orddict_with_ordset([_S|Ss], [{_K, _}=P|Ds]) -> % _S == _K
+  [P|orddict_with_ordset(Ss, Ds)];
+orddict_with_ordset([], _) -> [];
+orddict_with_ordset(_, []) -> [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(labels).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
+
+reg_def_use(I, Target) ->
+  {TDef, TUse} = def_use(I, Target),
+  {reg_names(TDef, Target), reg_names(TUse, Target)}.
-- 
cgit v1.2.3


From cc115ebc67a465233c7740efb42e0bc9584ad352 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sat, 24 Sep 2016 09:37:46 +0200
Subject: hipe: Add branch prediction accessor ra callbacks

Adds a new register allocator callback
Target:branch_preds(Instr, Context) which, for a control flow
instruction Instr, returns a list of tuples {Target, Probability} for
each label name Target that Instr may branch to. Probability is a float
between 0.0 and 1.0 and corresponds to the predicted probability that
control flow branches to the corresponding target. The probabilities may
sum to at most 1.0 (rounding errors aside). Note that a sum less than
1.0 is valid.
---
 lib/hipe/regalloc/hipe_amd64_specific_sse2.erl | 6 ++++++
 lib/hipe/regalloc/hipe_arm_specific.erl        | 6 ++++++
 lib/hipe/regalloc/hipe_ppc_specific.erl        | 6 ++++++
 lib/hipe/regalloc/hipe_ppc_specific_fp.erl     | 6 ++++++
 lib/hipe/regalloc/hipe_sparc_specific.erl      | 6 ++++++
 lib/hipe/regalloc/hipe_sparc_specific_fp.erl   | 6 ++++++
 lib/hipe/regalloc/hipe_x86_specific.erl        | 6 ++++++
 7 files changed, 42 insertions(+)

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 341233e851..8914e7cac3 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -60,6 +60,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 %%----------------------------------------------------------------------------
 
 -include("../flow/cfg.hrl").
@@ -156,6 +159,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% AMD64 stuff
 
 def_use(Instruction, _) ->
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index 224837a34a..517ad83982 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -56,6 +56,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -119,6 +122,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_arm_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_arm_cfg:branch_preds(Branch).
+
 %% ARM stuff
 
 def_use(Instruction, Ctx) ->
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index bc4bd1972e..50ab0f04cf 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -56,6 +56,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -119,6 +122,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(Instruction, Ctx) ->
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 3aab14f061..5a4f63fc78 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -56,6 +56,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -112,6 +115,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(I, Ctx) ->
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 12642709b9..9eb57dafd2 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -56,6 +56,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -119,6 +122,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(Instruction, Ctx) ->
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index ba4849d2d5..9993af81c2 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -56,6 +56,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -112,6 +115,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(I, Ctx) ->
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index 0d9d7b751e..f21384d377 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -73,6 +73,9 @@
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -160,6 +163,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% X86 stuff
 
 def_use(Instruction,_) ->
-- 
cgit v1.2.3


From d1d26f4bf9da3cc5eab4e918df771d67fe9e6bb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 16:39:26 +0100
Subject: hipe: Add range splitter range_split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hipe_range_split is a complex live range splitter, more sophisticated
thatn hipe_restore_reuse, but still targeted specifically at temporaries
forced onto stack by being live over call instructions.

hipe_range_split partitions the control flow graph at call instructions,
like hipe_regalloc_prepass. Splitting decisions are made on a per
partition and per temporary basis.

There are three different ways in which hipe_range_split may choose to
split a temporary in a program partition:

 * Mode1: Spill the temp before calls, and restore it after them
 * Mode2: Spill the temp after definitions, restore it after calls
 * Mode3: Spill the temp after definitions, restore it before uses

To pick which of these should be used for each temp×partiton pair,
hipe_range_split uses a cost function. The cost is simply the sum of the
cost of all expected stack accesses, and the cost for an individual
stack access is based on the probability weight of the basic block that
it resides in. This biases the range splitter so that it attempts moving
stack accesses from a functions hot path to the cold path.
hipe_bb_weights is used to compute the probability weights.

mode3 is effectively the same as what hipe_restore_reuse does. Because
of this, hipe_restore_reuse reuses the analysis pass of
hipe_restore_reuse in order to compute the minimal needed set of spills
and restores. The reason mode3 was introduced to hipe_range_split rather
than simply composing it with hipe_restore_reuse (by running both) is
that such a composition resulted in poor register allocation results due
to insufficiently strong move coalescing in the register allocator.

The cost function heuristic has a couple of tuning knobs:

 * {range_split_min_gain, Gain} (default: 1.1, range: [0.0, inf))
   The minimum proportional improvement that the cost of all stack
   accesses to a temp must display in order for that temp to be split.
 * {range_split_mode1_fudge, Factor} (default: 1.1, range: [0.0, inf))
   Costs for mode1 are multiplied by this factor in order to discourage
   it when it provides marginal benefits. The justification is that
   mode1 causes temps to be live for longest, thus leading to higher
   register pressure.
 * {range_split_weight_power, Factor} (default: 2, range: (0.0, inf))
   Adjusts how much effect the basic block weights have on the cost of a
   stack access. A stack access in a block with weight 1.0 has cost 1.0,
   a stack access in a block with weight 0.01 has cost 1/Factor.

Additionally, the option range_split_weights chooses whether the basic
block weights are used at all.

In the case that the input is very big, hipe_range_split automatically
falls back to hipe_restore_reuse only in order to keep compile times
under control. Note that this is not only because of hipe_range_split
being slow, but also due to the resulting program being slow to register
allocate, and is not as partitionable by hipe_regalloc_prepass.
hipe_restore_reuse, on the other hand, does not affect the programs
partitionability.

The hipe_range_split pass is controlled by a new option ra_range_split.
ra_range_split is added to o2, and ra_restore_reuse is disabled in o2.
---
 lib/hipe/regalloc/Makefile               |    1 +
 lib/hipe/regalloc/hipe_range_split.erl   | 1187 ++++++++++++++++++++++++++++++
 lib/hipe/regalloc/hipe_regalloc_loop.erl |   18 +-
 lib/hipe/regalloc/hipe_restore_reuse.erl |   15 +-
 4 files changed, 1215 insertions(+), 6 deletions(-)
 create mode 100644 lib/hipe/regalloc/hipe_range_split.erl

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile
index 73ac232f51..81a92e5d35 100644
--- a/lib/hipe/regalloc/Makefile
+++ b/lib/hipe/regalloc/Makefile
@@ -50,6 +50,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \
 	  hipe_optimistic_regalloc \
 	  hipe_coalescing_regalloc \
 	  hipe_graph_coloring_regalloc \
+	  hipe_range_split \
 	  hipe_regalloc_loop \
 	  hipe_regalloc_prepass \
 	  hipe_restore_reuse \
diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl
new file mode 100644
index 0000000000..39b086d9f7
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_range_split.erl
@@ -0,0 +1,1187 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       TEMPORARY LIVE RANGE SPLITTING PASS
+%%
+%% Live range splitting is useful to allow a register allocator to allocate a
+%% temporary to register for a part of its lifetime, even if it cannot be for
+%% the entirety. This improves register allocation quality, at the cost of
+%% making the allocation problem more time and memory intensive to solve.
+%%
+%% Optimal allocation can be achieved if all temporaries are split at every
+%% program point (between all instructions), but this makes register allocation
+%% infeasably slow in practice. Instead, this module uses heuristics to choose
+%% which temporaries should have their live ranges split, and at which points.
+%%
+%% The range splitter only considers temps which are live during a call
+%% instruction, since they're known to be spilled. The control-flow graph is
+%% partitioned at call instructions and splitting decisions are made separately
+%% for each partition. The register copy of a temp (if any) gets a separate name
+%% in each partition.
+%%
+%% There are three different ways the range splitter may choose to split a
+%% temporary in a program partition:
+%%
+%%  * Mode1: Spill the temp before calls, and restore it after them
+%%  * Mode2: Spill the temp after definitions, restore it after calls
+%%  * Mode3: Spill the temp after definitions, restore it before uses
+%%
+%% To pick which of these should be used for each temp×partiton pair, the range
+%% splitter uses a cost function. The cost is simply the sum of the cost of all
+%% expected stack accesses, and the cost for an individual stack access is based
+%% on the probability weight of the basic block that it resides in. This biases
+%% the range splitter so that it attempts moving stack accesses from a functions
+%% hot path to the cold path.
+%%
+%% The heuristic has a couple of tuning knobs, adjusting its preference for
+%% different spilling modes, aggressiveness, and how much influence the basic
+%% block probability weights have.
+%%
+%% Edge case not handled: Call instructions directly defining a pseudo. In that
+%% case, if that pseudo has been selected for mode2 spills, no spill is inserted
+%% after the call.
+-module(hipe_range_split).
+
+-export([split/5]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+%% -define(DEBUG, 1).
+-include("../main/hipe.hrl").
+
+%% Heuristic tuning constants
+-define(DEFAULT_MIN_GAIN, 1.1).    % option: range_split_min_gain
+-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge
+-define(DEFAULT_WEIGHT_POWER, 2).  % option: range_split_weight_power
+-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)).
+-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)).
+-define(HEUR_MAX_TEMPS, 20000).
+
+-type target_cfg()       :: any().
+-type target_instr()     :: any().
+-type target_temp()      :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type liveset()          :: ordsets:ordset(temp()).
+-type temp()             :: non_neg_integer().
+-type label()            :: non_neg_integer().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context(),
+	    comp_options())
+	   -> target_cfg().
+split(TCFG0, Liveness, TargetMod, TargetContext, Options) ->
+  Target = {TargetMod, TargetContext},
+  NoTemps = number_of_temporaries(TCFG0, Target),
+  if NoTemps > ?HEUR_MAX_TEMPS ->
+      ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n",
+		 [?MODULE, NoTemps]),
+      hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext);
+     true ->
+      Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options),
+      {CFG0, Temps} = convert(TCFG0, Target),
+      Avail = avail_analyse(TCFG0, Liveness, Target),
+      Defs = def_analyse(CFG0, TCFG0),
+      RDefs = rdef_analyse(CFG0),
+      PLive = plive_analyse(CFG0),
+      {CFG, DUCounts, Costs, DSets0} =
+	scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target),
+      {DSets, _} = hipe_dsets:to_map(DSets0),
+      Renames = decide(DUCounts, Costs, Target, Options),
+      rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	      Temps)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Internal program representation
+%%
+%% Second pass: Convert cfg to internal representation
+
+-record(cfg, {
+	  rpo_labels :: [label()],
+	  bbs        :: #{label() => bb()}
+	 }).
+-type cfg() :: #cfg{}.
+
+cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS).
+
+cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO).
+
+-record(bb, {
+	  code     :: [code_elem()],
+	  %% If the last instruction of code defines all allocatable registers
+	  has_call :: boolean(),
+	  succ     :: [label()]
+	 }).
+-type bb() :: #bb{}.
+-type code_elem() :: instr() | mode2_spills() | mode3_restores().
+
+bb_code(#bb{code=Code}) -> Code.
+bb_has_call(#bb{has_call=HasCall}) -> HasCall.
+bb_succ(#bb{succ=Succ}) -> Succ.
+
+bb_butlast(#bb{code=Code}) ->
+  bb_butlast_1(Code).
+
+bb_butlast_1([_Last]) -> [];
+bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)].
+
+bb_last(#bb{code=Code}) -> lists:last(Code).
+
+-record(instr, {
+	  i   :: target_instr(),
+	  def :: ordsets:ordset(temp()),
+	  use :: ordsets:ordset(temp())
+	 }).
+-type instr() :: #instr{}.
+
+-record(mode2_spills, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode2_spills() :: #mode2_spills{}.
+
+-record(mode3_restores, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode3_restores() :: #mode3_restores{}.
+
+-spec convert(target_cfg(), target()) -> {cfg(), temps()}.
+convert(CFG, Target) ->
+  RPO = reverse_postorder(CFG, Target),
+  {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []),
+  {#cfg{rpo_labels = RPO,
+	bbs = maps:from_list(BBsList)},
+   Temps}.
+
+convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps};
+convert_bbs([L|Ls], CFG, Target, Temps0, Acc) ->
+  Succs = hipe_gen_cfg:succ(CFG, L),
+  TBB = bb(CFG, L, Target),
+  TCode = hipe_bb:code(TBB),
+  {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []),
+  HasCall = defines_all_alloc(Last#instr.i, Target),
+  BB = #bb{code = Code,
+	   has_call = HasCall,
+	   succ = Succs},
+  convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]).
+
+convert_code([], _Target, Temps, [Last|_]=Acc) ->
+  {lists:reverse(Acc), Last, Temps};
+convert_code([TI|TIs], Target, Temps0, Acc) ->
+  {TDef, TUse} = def_use(TI, Target),
+  I = #instr{i = TI,
+	     def = ordsets:from_list(reg_names(TDef, Target)),
+	     use = ordsets:from_list(reg_names(TUse, Target))},
+  Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)),
+  convert_code(TIs, Target, Temps, [I|Acc]).
+
+-type temps() :: #{temp() => target_temp()}.
+add_temps([], _Target, Temps) -> Temps;
+add_temps([T|Ts], Target, Temps) ->
+  add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL
+%% splits)
+-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}.
+-type defs()  :: #{label() => defsetf()}.
+
+-spec def_analyse(cfg(), target_cfg()) -> defs().
+def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) ->
+  Defs0 = def_init(CFG),
+  def_dataf(RPO, TCFG, Defs0).
+
+-spec def_init(cfg()) -> defsi().
+def_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     false -> def_init_scan(bb_code(BB), defseti_new());
+	     true ->
+	       {call, def_init_scan(bb_butlast(BB), defseti_new()),
+		defseti_from_ordset((bb_last(BB))#instr.def)}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]).
+
+def_init_scan([], Defset) -> Defset;
+def_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = defseti_add_ordset(Def, Defset0),
+  def_init_scan(Is, Defset).
+
+-spec def_dataf([label()], target_cfg(), defsi()) -> defs().
+def_dataf(Labels, TCFG, Defs0) ->
+  case def_dataf_once(Labels, TCFG, Defs0, 0) of
+    {Defs, 0} ->
+      def_finalise(Defs);
+    {Defs, _Changed} ->
+      def_dataf(Labels, TCFG, Defs)
+  end.
+
+-spec def_finalise(defsi()) -> defs().
+def_finalise(Defs) ->
+  maps:from_list([{K, defseti_finalise(BL)}
+		  || {K, {call, BL, _}} <- maps:to_list(Defs)]).
+
+-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer())
+		    -> {defsi(), non_neg_integer()}.
+def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed};
+def_dataf_once([L|Ls], TCFG, Defs0, Changed0) ->
+  AddPreds =
+    fun(Defset1) ->
+	lists:foldl(fun(P, Defset2) ->
+			defseti_union(defout(P, Defs0), Defset2)
+		    end, Defset1, hipe_gen_cfg:pred(TCFG, L))
+    end,
+  Defset =
+    case Defset0 = maps:get(L, Defs0) of
+      {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout};
+      _ -> AddPreds(Defset0)
+    end,
+  Changed = case Defset =:= Defset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed).
+
+-spec defout(label(), defsi()) -> defseti().
+defout(L, Defs) ->
+  case maps:get(L, Defs) of
+    {call, _DefButLast, Defout} -> Defout;
+    Defout -> Defout
+  end.
+
+-spec defbutlast(label(), defs()) -> defsetf().
+defbutlast(L, Defs) -> maps:get(L, Defs).
+
+-spec defseti_new() -> defseti().
+-spec defseti_union(defseti(), defseti()) -> defseti().
+-spec defseti_add_ordset(ordset:ordset(temp()), defseti()) -> defseti().
+-spec defseti_from_ordset(ordset:ordset(temp())) -> defseti().
+-spec defseti_finalise(defseti()) -> defsetf().
+-spec defsetf_member(temp(), defsetf()) -> boolean().
+-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf())
+			      -> ordsets:ordset(temp()).
+
+-type defseti() :: bitord().
+defseti_new() -> bitord_new().
+defseti_union(A, B) -> bitord_union(A, B).
+defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D).
+defseti_from_ordset(OS) -> bitord_from_ordset(OS).
+defseti_finalise(D) -> bitarr_from_bitord(D).
+
+-type defsetf() :: bitarr().
+defsetf_member(E, D) -> bitarr_get(E, D).
+
+defsetf_intersect_ordset([], _D) -> [];
+defsetf_intersect_ordset([E|Es], D) ->
+  case bitarr_get(E, D) of
+    true  -> [E|defsetf_intersect_ordset(Es,D)];
+    false ->    defsetf_intersect_ordset(Es,D)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines
+%% in mode2)
+-type rdefsi() :: #{label() =>
+		     {call, rdefseti(), [label()]}
+		   | {nocall, rdefseti(), rdefseti(), [label()]}}.
+-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}.
+
+-spec rdef_analyse(cfg()) -> rdefs().
+rdef_analyse(CFG = #cfg{rpo_labels=RPO}) ->
+  Defs0 = rdef_init(CFG),
+  PO = rdef_postorder(RPO, CFG, []),
+  rdef_dataf(PO, Defs0).
+
+%% Filter out 'call' labels, since they don't change
+-spec rdef_postorder([label()], cfg(), [label()]) -> [label()].
+rdef_postorder([], _CFG, Acc) -> Acc;
+rdef_postorder([L|Ls], CFG, Acc) ->
+  case bb_has_call(cfg_bb(L, CFG)) of
+    true  -> rdef_postorder(Ls, CFG, Acc);
+    false -> rdef_postorder(Ls, CFG, [L|Acc])
+  end.
+
+-spec rdef_init(cfg()) -> rdefsi().
+rdef_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [{L, case HasCall of
+	   true ->
+	     Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()),
+	     {call, Defin, Succs};
+	   false ->
+	     Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()),
+	     {nocall, Gen, rdefseti_top(), Succs}
+	 end}
+     || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti().
+rdef_init_scan([], Defset) -> Defset;
+rdef_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = rdefseti_add_ordset(Def, Defset0),
+  rdef_init_scan(Is, Defset).
+
+-spec rdef_dataf([label()], rdefsi()) -> rdefs().
+rdef_dataf(Labels, Defs0) ->
+  case rdef_dataf_once(Labels, Defs0, 0) of
+    {Defs, 0} ->
+      rdef_finalise(Defs);
+    {Defs, _Changed} ->
+      rdef_dataf(Labels, Defs)
+  end.
+
+-spec rdef_finalise(rdefsi()) -> rdefs().
+rdef_finalise(Defs) ->
+  maps:map(fun(L, V) ->
+	       Succs = rsuccs_val(V),
+	       Defout0 = rdefout_intersect(L, Defs, rdefseti_top()),
+	       {final, rdefset_finalise(Defout0), Succs}
+	   end, Defs).
+
+-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer())
+		     -> {rdefsi(), non_neg_integer()}.
+rdef_dataf_once([], Defs, Changed) -> {Defs, Changed};
+rdef_dataf_once([L|Ls], Defs0, Changed0) ->
+  #{L := {nocall, Gen, Defin0, Succs}} = Defs0,
+  Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)),
+  Defset = {nocall, Gen, Defin, Succs},
+  Changed = case Defin =:= Defin0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed).
+
+-spec rdefin(label(), rdefsi()) -> rdefseti().
+rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)).
+rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin;
+rdefin_val({call, Defin, _Succs}) -> Defin.
+
+-spec rsuccs(label(), rdefsi()) -> [label()].
+rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)).
+rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs;
+rsuccs_val({call, _Defin, Succs}) -> Succs.
+
+-spec rdefout(label(), rdefs()) -> rdefsetf().
+rdefout(L, Defs) ->
+  #{L := {final, Defout, _Succs}} = Defs,
+  Defout.
+
+-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti().
+rdefout_intersect(L, Defs, Init) ->
+  lists:foldl(fun(S, Acc) ->
+		  rdefseti_intersect(rdefin(S, Defs), Acc)
+	      end, Init, rsuccs(L, Defs)).
+
+-type rdefseti() :: bitord() | top.
+rdefseti_top() -> top.
+rdefseti_empty() -> bitord_new().
+-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti().
+rdefseti_from_ordset(OS) -> bitord_from_ordset(OS).
+
+-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti().
+rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf
+rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D).
+
+-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_union(top, _) -> top;
+rdefseti_union(_, top) -> top;
+rdefseti_union(A, B) -> bitord_union(A, B).
+
+-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_intersect(top, D) -> D;
+rdefseti_intersect(D, top) -> D;
+rdefseti_intersect(A, B) -> bitord_intersect(A, B).
+
+-type rdefsetf() :: {arr, bitarr()} | top.
+-spec rdefset_finalise(rdefseti()) -> rdefsetf().
+rdefset_finalise(top) -> top;
+rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}.
+
+%% rdefsetf_top() -> top.
+rdefsetf_empty() -> {arr, bitarr_new()}.
+
+-spec rdefsetf_add_ordset(ordset:ordset(temp()), rdefsetf()) -> rdefsetf().
+rdefsetf_add_ordset(_, top) -> top;
+rdefsetf_add_ordset(OS, {arr, Arr}) ->
+  {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}.
+
+-spec rdef_step(instr(), rdefsetf()) -> rdefsetf().
+rdef_step(#instr{def=Def}, Defset) ->
+  %% ?ASSERT(not defines_all_alloc(I, Target)),
+  rdefsetf_add_ordset(Def, Defset).
+
+-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf())
+			      -> ordsets:ordset(temp()).
+ordset_subtract_rdefsetf(_, top) -> [];
+ordset_subtract_rdefsetf(OS, {arr, Arr}) ->
+  %% Lazy implementation; could do better if OS can grow
+  lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Integer sets represented as bit sets
+%%
+%% Two representations; bitord() and bitarr()
+-define(LIMB_IX_BITS,    11).
+-define(LIMB_BITS,       (1 bsl ?LIMB_IX_BITS)).
+-define(LIMB_IX(Index),  (Index bsr ?LIMB_IX_BITS)).
+-define(BIT_IX(Index),   (Index band (?LIMB_BITS - 1))).
+-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))).
+
+%% bitord(): fast at union/2 and can be compared for equality with '=:='
+-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitord_new() -> bitord().
+bitord_new() -> [].
+
+-spec bitord_union(bitord(), bitord()) -> bitord().
+bitord_union(Lhs, Rhs) ->
+  orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs).
+
+-spec bitord_intersect(bitord(), bitord()) -> bitord().
+bitord_intersect([], _) -> [];
+bitord_intersect(_, []) -> [];
+bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) ->
+  [{K, L band R} | bitord_intersect(Ls, Rs)];
+bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK ->
+  bitord_intersect(Ls, Rs);
+bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK ->
+  bitord_intersect(Ls, Rs).
+
+-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord().
+bitord_from_ordset([]) -> [];
+bitord_from_ordset([B|Bs]) ->
+  bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)).
+
+bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) ->
+  bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B));
+bitord_from_ordset_1([B|Bs], Key, Val) ->
+  [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))];
+bitord_from_ordset_1([], Key, Val) -> [{Key, Val}].
+
+%% bitarr(): fast (enough) at get/2
+-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitarr_new() -> bitarr().
+bitarr_new() -> array:new({default, 0}).
+
+-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean().
+bitarr_get(Index, Array) ->
+  Limb = array:get(?LIMB_IX(Index), Array),
+  0 =/= (Limb band ?BIT_MASK(Index)).
+
+-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr().
+bitarr_set(Index, Array) ->
+  Limb0 = array:get(?LIMB_IX(Index), Array),
+  Limb = Limb0 bor ?BIT_MASK(Index),
+  array:set(?LIMB_IX(Index), Limb, Array).
+
+-spec bitarr_from_bitord(bitord()) -> bitarr().
+bitarr_from_bitord(Ord) ->
+  array:from_orddict(Ord, 0).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Sixth pass: Partition-local liveness analysis
+%%
+%% As temps are not spilled when exiting a partition in mode2, only
+%% partition-local uses need to be considered when deciding which temps need
+%% restoring at partition entry.
+
+-type plive() :: #{label() =>
+		     {call, liveset(), [label()]}
+		   | {nocall, {liveset(), liveset()}, liveset(), [label()]}}.
+
+-spec plive_analyse(cfg()) -> plive().
+plive_analyse(CFG) ->
+  Defs0 = plive_init(CFG),
+  PO = cfg_postorder(CFG),
+  plive_dataf(PO, Defs0).
+
+-spec plive_init(cfg()) -> plive().
+plive_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     true ->
+	       {Gen, _} = plive_init_scan(bb_code(BB)),
+	       {call, Gen, Succs};
+	     false ->
+	       GenKill = plive_init_scan(bb_code(BB)),
+	       {nocall, GenKill, liveset_empty(), Succs}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec plive_init_scan([instr()]) -> {liveset(), liveset()}.
+plive_init_scan([]) -> {liveset_empty(), liveset_empty()};
+plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) ->
+  {Gen0, Kill0} = plive_init_scan(Is),
+  Gen1 = liveset_subtract(Gen0, InstrKill),
+  Gen = liveset_union(Gen1, InstrGen),
+  Kill1 = liveset_union(Kill0, InstrKill),
+  Kill = liveset_subtract(Kill1, InstrGen),
+  {Gen, Kill}.
+
+-spec plive_dataf([label()], plive()) -> plive().
+plive_dataf(Labels, PLive0) ->
+  case plive_dataf_once(Labels, PLive0, 0) of
+    {PLive, 0} -> PLive;
+    {PLive, _Changed} ->
+      plive_dataf(Labels, PLive)
+  end.
+
+-spec plive_dataf_once([label()], plive(), non_neg_integer()) ->
+			  {plive(), non_neg_integer()}.
+plive_dataf_once([], PLive, Changed) -> {PLive, Changed};
+plive_dataf_once([L|Ls], PLive0, Changed0) ->
+  Liveset =
+    case Liveset0 = maps:get(L, PLive0) of
+      {call, Livein, Succs} ->
+	{call, Livein, Succs};
+      {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} ->
+	Liveout = pliveout(L, PLive0),
+	Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)),
+	{nocall, GenKill, Livein, Succs}
+    end,
+  Changed = case Liveset =:= Liveset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed).
+
+-spec pliveout(label(), plive()) -> liveset().
+pliveout(L, PLive) ->
+  liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]).
+
+-spec psuccs(label(), plive()) -> [label()].
+psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)).
+psuccs_val({call, _Livein, Succs}) -> Succs;
+psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs.
+
+-spec plivein(label(), plive()) -> liveset().
+plivein(L, PLive) -> plivein_val(maps:get(L, PLive)).
+plivein_val({call, Livein, _Succs}) -> Livein;
+plivein_val({nocall, _GenKill, Livein, _Succs}) ->  Livein.
+
+liveset_empty() -> ordsets:new().
+liveset_subtract(A, B) -> ordsets:subtract(A, B).
+liveset_union(A, B) -> ordsets:union(A, B).
+liveset_union(LivesetList) -> ordsets:union(LivesetList).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Third pass: Compute dataflow analyses required for placing mode3
+%% spills/restores.
+%% Reuse analysis implementation in hipe_restore_reuse.
+%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one
+%% too.
+-type avail() :: hipe_restore_reuse:avail().
+
+-spec avail_analyse(target_cfg(), liveness(), target()) -> avail().
+avail_analyse(CFG, Liveness, Target) ->
+  hipe_restore_reuse:analyse(CFG, Liveness, Target).
+
+-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()).
+mode3_split_in_block(L, Avail) ->
+  hipe_restore_reuse:split_in_block(L, Avail).
+
+-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()).
+mode3_block_renameset(L, Avail) ->
+  hipe_restore_reuse:renamed_in_block(L, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Seventh pass
+%%
+%% Compute program space partitioning, collect information required by the
+%% heuristic.
+-type part_key() :: label().
+-type part_dsets() :: hipe_dsets:dsets(part_key()).
+-type part_dsets_map() :: #{part_key() => part_key()}.
+-type ducounts() :: #{part_key() => ducount()}.
+
+-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(),
+	   target()) -> {cfg(), ducounts(), costs(), part_dsets()}.
+scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) ->
+  #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0,
+  CFG = CFG0#cfg{bbs=#{}}, % kill reference
+  DSets0 = hipe_dsets:new(Labels),
+  Costs0 = costs_new(),
+  {BBs, DUCounts0, Costs1, DSets1} =
+    scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail,
+	     Target, #{}, Costs0, DSets0, []),
+  {RLList, DSets2} = hipe_dsets:to_rllist(DSets1),
+  {Costs, DSets} = costs_map_roots(DSets2, Costs1),
+  DUCounts = collect_ducounts(RLList, DUCounts0, #{}),
+  {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}.
+
+-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts())
+		      -> ducounts().
+collect_ducounts([], _, Acc) -> Acc;
+collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) ->
+  DUCount = lists:foldl(
+	      fun(Key, FAcc) ->
+		  ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc)
+	      end, ducount_new(), Ls),
+  collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}).
+
+-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(),
+	       rdefs(), avail(), target(), ducounts(), costs(), part_dsets(),
+	       [{label(), bb()}])
+	      -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}.
+scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target,
+	 DUCounts, Costs, DSets, Acc) ->
+  {Acc, DUCounts, Costs, DSets};
+scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target,
+	 DUCounts0, Costs0, DSets0, Acc) ->
+  Wt = weight(L, Weights),
+  {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} =
+    case bb_has_call(BB) of
+      false ->
+	DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end,
+			     DSets0, bb_succ(BB)),
+	{DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs),
+	 liveout(Liveness, L, Target)};
+      true ->
+	LastI = #instr{def=LastDef} = bb_last(BB),
+	LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef),
+	%% We can omit the spill of a temp that has not been defined since the
+	%% last time it was spilled
+	SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)),
+	Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0),
+	Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) ->
+				 SLivein = livein(Liveness, S, Target),
+				 SPLivein = plivein(S, PLive),
+				 SWt = weight_scaled(L, BranchWt, Weights),
+				 Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2),
+				 costs_insert(entry2, S, SWt, SPLivein, Costs3)
+			     end, Costs1, branch_preds(LastI#instr.i, Target)),
+	{DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore}
+    end,
+  Mode3Splits = mode3_split_in_block(L, Avail),
+  {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []),
+  {Code, DUCount, Mode2Spills} =
+    scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode),
+  DUCounts = DUCounts0#{L => DUCount},
+  M2SpillSet = ordsets:from_list(Mode2Spills),
+  Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5),
+  Mode3Renames = mode3_block_renameset(L, Avail),
+  Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6),
+  Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7),
+  Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8),
+  scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts,
+	   Costs, DSets, [{L,BB#bb{code=Code}}|Acc]).
+
+-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs())
+			     -> costs().
+add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) ->
+  Unsplit = orddict_without_ordset(Mode3Renames,
+				   orddict:from_list(ducount_to_list(DUCount))),
+  add_unsplit_mode3_costs_1(Unsplit, L, Costs).
+
+-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs())
+			       -> costs().
+add_unsplit_mode3_costs_1([], _L, Costs) -> Costs;
+add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) ->
+  add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)).
+
+%% @doc Returns a new orddict without keys in Set and their associated values.
+-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			    -> orddict:orddict(K, V).
+orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K ->
+  orddict_without_ordset(Ss, Dict);
+orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K ->
+  [D|orddict_without_ordset(Set, Ds)];
+orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K
+  orddict_without_ordset(Ss, Ds);
+orddict_without_ordset(_, []) -> [];
+orddict_without_ordset([], Dict) -> Dict.
+
+%% Scans the code forward, collecting and inserting mode3 restores
+-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()),
+		  [code_elem()])
+		 -> {[code_elem()], ordsets:ordset(temp())}.
+scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored};
+scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) ->
+  #instr{def=Def, use=Use} = I,
+  {ToRestore, SplitHere1} =
+    lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0),
+  SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1),
+  Acc =
+    case ToRestore of
+      [] -> [I | Acc0];
+      _  -> [I, #mode3_restores{temps=ToRestore} | Acc0]
+    end,
+  scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc).
+
+%% Scans the code backwards, collecting def/use counts and mode2 spills
+-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(),
+	      [temp()], [code_elem()])
+	     -> {[code_elem()], ducount(), [temp()]}.
+scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) ->
+  {Acc, DUCount, Spills};
+scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) ->
+  scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]);
+scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) ->
+  #instr{def=Def,use=Use} = I,
+  DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)),
+  Livein = liveness_step(I, Liveout),
+  RDefin = rdef_step(I, RDefout),
+  %% The temps that would be spilled after I in mode 2
+  NewSpills = ordset_subtract_rdefsetf(
+		ordsets:intersection(Def, Liveout),
+		RDefout),
+  ?ASSERT(NewSpills =:= (NewSpills -- Spills0)),
+  Spills = NewSpills ++ Spills0,
+  Acc1 = case NewSpills of
+	   [] -> Acc0;
+	   _ -> [#mode2_spills{temps=NewSpills}|Acc0]
+	 end,
+  scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]).
+
+-spec liveness_step(instr(), liveset()) -> liveset().
+liveness_step(#instr{def=Def, use=Use}, Liveout) ->
+  ordsets:union(Use, ordsets:subtract(Liveout, Def)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% First pass: compute basic-block weighting
+
+-type weights() :: no_bb_weights
+		 | {hipe_bb_weights:bb_weights(), float()}.
+
+-spec weight(label(), weights()) -> float().
+weight(L, Weights) -> weight_scaled(L, 1.0, Weights).
+
+-spec compute_weights(target_cfg(), target_module(), target_context(),
+		      comp_options()) -> weights().
+compute_weights(CFG, TargetMod, TargetContext, Options) ->
+  case proplists:get_bool(range_split_weights, Options) of
+    false -> no_bb_weights;
+    true ->
+      {hipe_bb_weights:compute(CFG, TargetMod, TargetContext),
+       ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power,
+					     Options, ?DEFAULT_WEIGHT_POWER))}
+  end.
+
+-spec weight_scaled(label(), float(), weights()) -> float().
+weight_scaled(_L, _Scale, no_bb_weights) -> 1.0;
+weight_scaled(L, Scale, {Weights, Const}) ->
+  Wt0 = hipe_bb_weights:weight(L, Weights) * Scale,
+  Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0),
+  ?WEIGHT_FUN(Wt, Const).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Heuristic splitting decision.
+%%
+%% Decide which temps to split, in which parts, and pick new names for them.
+-type spill_mode() :: mode1 % Spill temps at partition exits
+		    | mode2 % Spill temps at definitions
+		    | mode3.% Spill temps at definitions, restore temps at uses
+-type ren() :: #{temp() => {spill_mode(), temp()}}.
+-type renames() :: #{label() => ren()}.
+
+-record(heur_par, {
+	  mode1_fudge :: float(),
+	  min_gain    :: float()
+	  }).
+-type heur_par() :: #heur_par{}.
+
+-spec decide(ducounts(), costs(), target(), comp_options()) -> renames().
+decide(DUCounts, Costs, Target, Options) ->
+  Par = #heur_par{
+	   mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options,
+					     ?DEFAULT_MODE1_FUDGE),
+	   min_gain    = proplists:get_value(range_split_min_gain, Options,
+					     ?DEFAULT_MIN_GAIN)},
+  decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}).
+
+-spec decide_parts([{part_key(), ducount()}], costs(), target(),
+		   heur_par(), renames())
+		  -> renames().
+decide_parts([], _Costs, _Target, _Par, Acc) -> Acc;
+decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) ->
+  Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par,
+			#{}),
+  decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}).
+
+-spec decide_temps([{temp(), float()}], part_key(), costs(), target(),
+		   heur_par(), ren())
+		  -> ren().
+decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc;
+decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) ->
+  SpillCost1 = costs_query(Temp, entry1, Part, Costs)
+    + costs_query(Temp, exit, Part, Costs),
+  SpillCost2 = costs_query(Temp, entry2, Part, Costs)
+    + costs_query(Temp, spill, Part, Costs),
+  SpillCost3 = costs_query(Temp, restore, Part, Costs),
+  Acc =
+    %% SpillCost1 =:= 0.0 usually means the temp is local to the partition;
+    %% hence no need to split it
+    case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S)
+      andalso (not is_precoloured(Temp, Target))
+      andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain))
+    of
+      false -> Acc0;
+      true ->
+	Mode =
+	  if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2,
+	     Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 ->
+	      mode1;
+	     SpillCost2 < SpillCost3 ->
+	      mode2;
+	     true ->
+	      mode3
+	  end,
+	Acc0#{Temp => {Mode, new_reg_nr(Target)}}
+  end,
+  decide_temps(Ts, Part, Costs, Target, Par, Acc).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Eighth pass: Rewrite program performing range splitting.
+
+-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(),
+	      avail(), part_dsets_map(), renames(), temps())
+	     -> target_cfg().
+rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets,
+	Renames, Temps) ->
+  rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets,
+	      Renames, Temps, TCFG).
+
+-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(),
+		  avail(), part_dsets_map(), renames(), temps(), target_cfg())
+		 -> target_cfg().
+rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames,
+	    _Temps, TCFG) ->
+  TCFG;
+rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	    Temps, TCFG0) ->
+  Code0Rev = lists:reverse(bb_code(BB)),
+  EntryRen = maps:get(maps:get(L,DSets), Renames),
+  M3Ren = mode3_block_renameset(L, Avail),
+  SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren),
+  Fun = fun(I) -> subst_temps(SubstFun, I, Target) end,
+  {Code, TCFG} =
+    case bb_has_call(BB) of
+      false ->
+	Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target,
+			       []),
+	{Code1, TCFG0};
+      true ->
+	CallI0 = hd(Code0Rev),
+	Succ = bb_succ(BB),
+	{CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets,
+					 Renames, Temps, CallI0#instr.i, TCFG0),
+	Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)),
+	Defout = defbutlast(L, Defs),
+	SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target),
+	Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps,
+			       Target, []),
+	Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]),
+	{Code2, TCFG1}
+    end,
+  TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code),
+  rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps,
+	      update_bb(TCFG, L, TBB, Target)).
+
+-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(),
+		     ordsets:ordset(temp()), temps(), target(),
+		     [target_instr()])
+		    -> [target_instr()].
+rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc;
+rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) ->
+  Acc =
+    case I of
+      #instr{i=TI} -> [Fun(TI)|Acc0];
+      #mode2_spills{temps=Mode2Spills} ->
+	add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0);
+      #mode3_restores{temps=Mode3Restores} ->
+	add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0)
+    end,
+  rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc).
+
+-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(),
+		       ordsets:ordset(temp()), temps(), [target_instr()])
+		      -> [target_instr()].
+add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc;
+add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) ->
+  Acc =
+    case Ren of
+      #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 ->
+	case Mode =/= mode3 orelse lists:member(R, M3Ren) of
+	  false -> Acc0;
+	  true ->
+	    #{R := T} = Temps,
+	    SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target),
+	    [SpillInstr|Acc0]
+	end;
+    #{} ->
+	Acc0
+  end,
+  add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc).
+
+-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(),
+			 [target_instr()])
+			-> [target_instr()].
+add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc;
+add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) ->
+  case Ren of
+    #{R := {mode3, NewName}} ->
+      #{R := T} = Temps,
+      RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target),
+      add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]);
+    #{} ->
+      add_mode3_restores(Rs, Target, Ren, Temps, Acc)
+  end.
+
+-type rewrite_fun() :: fun((target_instr()) -> target_instr()).
+-type subst_fun() :: fun((target_temp()) -> target_temp()).
+-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun().
+rewrite_subst_fun(Target, Ren, M3Ren) ->
+  fun(Temp) ->
+      Reg = reg_nr(Temp, Target),
+      case Ren of
+	#{Reg := {Mode, NewName}} ->
+	  case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of
+	    false -> Temp;
+	    true -> update_reg_nr(NewName, Temp, Target)
+	  end;
+	#{} -> Temp
+      end
+  end.
+
+-type spillmap() :: [{temp(), target_instr()}].
+-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target())
+		 -> spillmap().
+mk_spillmap(Ren, Livein, Defout, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)}
+   end || {Reg, {mode1, NewName}} <- maps:to_list(Ren),
+	  lists:member(Reg, Livein), defsetf_member(Reg, Defout)].
+
+-spec mk_restores(ren(), liveset(), liveset(), temps(), target())
+		 -> [target_instr()].
+mk_restores(Ren, Livein, PLivein, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target)
+   end || {Reg, {Mode, NewName}} <- maps:to_list(Ren),
+	  (       (Mode =:= mode1 andalso lists:member(Reg, Livein ))
+	   orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))].
+
+-spec inject_restores([label()], target(), liveness(), plive(),
+		      part_dsets_map(), renames(), temps(), target_instr(),
+		      target_cfg())
+		     -> {target_instr(), target_cfg()}.
+inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI,
+		TCFG) ->
+  {CFTI, TCFG};
+inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0,
+		TCFG0) ->
+  Ren = maps:get(maps:get(L,DSets), Renames),
+  Livein = livein(Liveness, L, Target),
+  PLivein = plivein(L, PLive),
+  {CFTI, TCFG} =
+    case mk_restores(Ren, Livein, PLivein, Temps, Target) of
+      [] -> {CFTI0, TCFG0}; % optimisation
+      Restores ->
+	RestBBLbl = new_label(Target),
+	Code = Restores ++ [mk_goto(L, Target)],
+	CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target),
+	TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target),
+	{CFTI1, TCFG1}
+    end,
+  inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI,
+		  TCFG).
+
+%% Heuristic. Move spills up until we meet the edge of the BB or a definition of
+%% that temp.
+-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()])
+		 -> [target_instr()].
+lift_spills([], _Target, SpillMap, Acc) ->
+  [SpillI || {_, SpillI} <- SpillMap] ++ Acc;
+lift_spills([I|Is], Target, SpillMap0, Acc) ->
+  Def = reg_defines(I, Target),
+  {Spills0, SpillMap} =
+    lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0),
+  Spills = [SpillI || {_, SpillI} <- Spills0],
+  lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]).
+
+reg_defines(I, Target) ->
+  reg_names(defines(I,Target), Target).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Costs ADT
+%%
+%% Keeps track of cumulative cost of spilling temps in particular partitions
+%% using particular spill modes.
+-type cost_map() :: #{[part_key()|temp()] => float()}.
+-type cost_key() :: entry1 | entry2 | exit | spill | restore.
+-record(costs, {entry1  = #{} :: cost_map()
+	       ,entry2  = #{} :: cost_map()
+	       ,exit    = #{} :: cost_map()
+	       ,spill   = #{} :: cost_map()
+	       ,restore = #{} :: cost_map()
+	       }).
+-type costs() :: #costs{}.
+
+-spec costs_new() -> costs().
+costs_new() -> #costs{}.
+
+-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs())
+		  -> costs().
+costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) ->
+  Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)};
+costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) ->
+  Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)};
+costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) ->
+  Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)};
+costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) ->
+  Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)};
+costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) ->
+  Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}.
+
+costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) ->
+  lists:foldl(fun(Live, CostMap1) ->
+		  map_update_counter([A|Live], Weight, CostMap1)
+	      end, CostMap0, Liveset).
+
+-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}.
+costs_map_roots(DSets0, Costs) ->
+  {Entry1,  DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1),
+  {Entry2,  DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2),
+  {Exit,    DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit),
+  {Spill,   DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill),
+  {Restore, DSets}  = costs_map_roots_1(DSets4, Costs#costs.restore),
+  {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore},
+   DSets}.
+
+costs_map_roots_1(DSets0, CostMap) ->
+  {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) ->
+				      {AR, DSets2} = hipe_dsets:find(A, DSets1),
+				      {{[AR|T], Wt}, DSets2}
+				  end, DSets0, maps:to_list(CostMap)),
+  {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}.
+
+maps_from_list_merge([], _MF, Acc) -> Acc;
+maps_from_list_merge([{K,V}|Ps], MF, Acc) ->
+  maps_from_list_merge(Ps, MF, case Acc of
+				 #{K := OV} -> Acc#{K := MF(V, OV)};
+				 #{}        -> Acc#{K => V}
+			       end).
+
+-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float().
+costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) ->
+  costs_query_1(Temp, Part, Entry1);
+costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) ->
+  costs_query_1(Temp, Part, Entry2);
+costs_query(Temp, exit, Part, #costs{exit=Exit}) ->
+  costs_query_1(Temp, Part, Exit);
+costs_query(Temp, spill, Part, #costs{spill=Spill}) ->
+  costs_query_1(Temp, Part, Spill);
+costs_query(Temp, restore, Part, #costs{restore=Restore}) ->
+  costs_query_1(Temp, Part, Restore).
+
+costs_query_1(Temp, Part, CostMap) ->
+  Key = [Part|Temp],
+  case CostMap of
+    #{Key := Wt} -> Wt;
+    #{} -> 0.0
+  end.
+
+-spec map_update_counter(Key, number(), #{Key => number(), OK => OV})
+			-> #{Key := number(), OK => OV}.
+map_update_counter(Key, Incr, Map) ->
+  case Map of
+    #{Key := Orig} -> Map#{Key := Orig + Incr};
+    #{}            -> Map#{Key => Incr}
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Def and use counting ADT
+-type ducount() :: #{temp() => float()}.
+
+-spec ducount_new() -> ducount().
+ducount_new() -> #{}.
+
+-spec ducount_add([temp()], float(), ducount()) -> ducount().
+ducount_add([], _Weight, DUCount) -> DUCount;
+ducount_add([T|Ts], Weight, DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := Count} -> DUCount0#{T := Count + Weight};
+      #{}           -> DUCount0#{T => Weight}
+    end,
+  ducount_add(Ts, Weight, DUCount).
+
+ducount_to_list(DUCount) -> maps:to_list(DUCount).
+
+-spec ducount_merge(ducount(), ducount()) -> ducount().
+ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCA), DCB);
+ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCB), DCA).
+
+ducount_merge_1([], DUCount) -> DUCount;
+ducount_merge_1([{T,AC}|Ts], DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := BC} -> DUCount0#{T := AC + BC};
+      #{}        -> DUCount0#{T => AC}
+    end,
+  ducount_merge_1(Ts, DUCount).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_1(number_of_temporaries).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+branch_preds(Instr, {TgtMod,TgtCtx}) ->
+  merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))).
+
+livein(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+merge_sorted_preds([]) -> [];
+merge_sorted_preds([{L, P1}, {L, P2}|LPs]) ->
+  merge_sorted_preds([{L, P1+P2}|LPs]);
+merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)].
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl
index 5d32073437..29ef3adcc2 100644
--- a/lib/hipe/regalloc/hipe_regalloc_loop.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl
@@ -99,11 +99,19 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod,
 			   TargetCtx, Options)
   end.
 
-do_range_split(CFG0, Liveness, TgtMod, TgtCtx, Options) ->
-  case proplists:get_bool(ra_restore_reuse, Options) of
+do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) ->
+  {CFG2, Liveness1} =
+    case proplists:get_bool(ra_restore_reuse, Options) of
+      true ->
+	CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx),
+	{CFG1, TgtMod:analyze(CFG1, TgtCtx)};
+      false ->
+	{CFG0, Liveness0}
+    end,
+  case proplists:get_bool(ra_range_split, Options) of
     true ->
-      CFG1 = hipe_restore_reuse:split(CFG0, Liveness, TgtMod, TgtCtx),
-      {CFG1, TgtMod:analyze(CFG1, TgtCtx)};
+      CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options),
+      {CFG3, TgtMod:analyze(CFG3, TgtCtx)};
     false ->
-      {CFG0, Liveness}
+      {CFG2, Liveness1}
   end.
diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl
index 2d8dea2a2c..2158bd185e 100644
--- a/lib/hipe/regalloc/hipe_restore_reuse.erl
+++ b/lib/hipe/regalloc/hipe_restore_reuse.erl
@@ -36,6 +36,14 @@
 
 -export([split/4]).
 
+%% Exports for hipe_range_split, which uses restore_reuse as one possible spill
+%% "mode"
+-export([analyse/3
+	,renamed_in_block/2
+	,split_in_block/2
+	]).
+-export_type([avail/0]).
+
 -compile(inline).
 
 %% -define(DO_ASSERT, 1).
@@ -59,7 +67,7 @@ split(CFG, Liveness, TargetMod, TargetContext) ->
   rewrite(CFG, Target, Avail).
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--type avail() :: #{label() => avail_bb()}.
+-opaque avail() :: #{label() => avail_bb()}.
 
 -record(avail_bb, {
 	  %% Blocks where HasCall is true are considered to have too high
@@ -332,6 +340,11 @@ rewrite([L|Ls], Target, Avail, Input0, CFG0) ->
 			       Input1, CFG1),
   rewrite(Ls, Target, Avail, Input, CFG).
 
+-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()).
+renamed_in_block(L, Avail) ->
+  ordsets:union([avail_self(L, Avail), want_in(L, Avail),
+		 want_out(L, Avail)]).
+
 -spec split_in_block(label(), avail()) -> ordsets:ordset(reg()).
 split_in_block(L, Avail) ->
   ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)),
-- 
cgit v1.2.3


From c52b2cf226cb3f1bb1b16bee28d47785506adff3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Thu, 16 Mar 2017 15:30:00 +0100
Subject: hipe: Add pseudo_spill_f?move instructions

These pseudo instructions are added to all backends and allow spill slot
to spill slot move coalescing in a clean way.

They have regular move semantics, but contain an additional scratch
register to be used if both source and destination are spilled, and can
not be move coalesced.

Additionally, a register allocator callback
Target:is_spill_move(Instr, Context) is added which allows the spill
slot allocators to check for these instructions and try to coalesce the
spill slots the two temporaries are allocated to.
---
 lib/hipe/regalloc/hipe_amd64_specific_sse2.erl | 4 ++++
 lib/hipe/regalloc/hipe_arm_specific.erl        | 4 ++++
 lib/hipe/regalloc/hipe_ppc_specific.erl        | 4 ++++
 lib/hipe/regalloc/hipe_ppc_specific_fp.erl     | 4 ++++
 lib/hipe/regalloc/hipe_sparc_specific.erl      | 4 ++++
 lib/hipe/regalloc/hipe_sparc_specific_fp.erl   | 4 ++++
 lib/hipe/regalloc/hipe_x86_specific.erl        | 4 ++++
 lib/hipe/regalloc/hipe_x86_specific_x87.erl    | 4 ++++
 8 files changed, 32 insertions(+)

(limited to 'lib/hipe/regalloc')

diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 8914e7cac3..d592ba391c 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -30,6 +30,7 @@
 	 def_use/2,
 	 is_arg/2,	%% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	%% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -194,6 +195,9 @@ is_move(Instruction, _) ->
 	andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst);
     false -> false
   end.
+
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_fmove(Instruction).
  
 reg_nr(Reg, _) ->
   hipe_x86:temp_reg(Reg).
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index 517ad83982..7ebc6aa336 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -154,6 +155,9 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_arm:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_arm:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index 50ab0f04cf..81bb551bd2 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -154,6 +155,9 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_ppc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 5a4f63fc78..dcfdf6592c 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -135,6 +136,9 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_ppc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_ppc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 9eb57dafd2..78b6379eba 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -154,6 +155,9 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_sparc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index 9993af81c2..485fdc212a 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -135,6 +136,9 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_sparc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_sparc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index f21384d377..dacfb71b00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -46,6 +46,7 @@
 	 def_use/2,
 	 is_arg/2,	% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -210,6 +211,9 @@ is_move(Instruction,_) ->
     false -> false
   end.
 
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg,_) ->
   hipe_x86:temp_reg(Reg).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
index 4b4c83f76d..3fe49e1f00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
@@ -47,6 +47,7 @@
 	 uses/2,
 	 defines/2,
 	 defines_all_alloc/2,
+	 is_spill_move/2,
 	 is_global/2,
 	 reg_nr/2,
 	 physical_name/2,
@@ -158,6 +159,9 @@ defines(I, _) ->
 
 defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I).
 
+is_spill_move(I, _) ->
+  hipe_x86:is_pseudo_spill_fmove(I).
+
 temp_is_double(Temp) ->
   hipe_x86:temp_type(Temp) =:= 'double'.
 
-- 
cgit v1.2.3