13 files changed, 1965 insertions, 75 deletions
diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile
index 209f230a9b..81a92e5d35 100644
--- a/lib/hipe/regalloc/Makefile
+++ b/lib/hipe/regalloc/Makefile
@@ -50,8 +50,10 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \
 	  hipe_optimistic_regalloc \
 	  hipe_coalescing_regalloc \
 	  hipe_graph_coloring_regalloc \
+	  hipe_range_split \
 	  hipe_regalloc_loop \
 	  hipe_regalloc_prepass \
+	  hipe_restore_reuse \
 	  hipe_ls_regalloc \
 	  hipe_ppc_specific hipe_ppc_specific_fp \
 	  hipe_sparc_specific hipe_sparc_specific_fp \
diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 9682d37520..d592ba391c 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -30,6 +30,7 @@
 	 def_use/2,
 	 is_arg/2,	%% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	%% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -50,12 +51,19 @@
 -export([check_and_rewrite/3,
 	 check_and_rewrite/4]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 %%----------------------------------------------------------------------------
 
 -include("../flow/cfg.hrl").
@@ -152,6 +160,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% AMD64 stuff
 
 def_use(Instruction, _) ->
@@ -184,10 +195,34 @@ is_move(Instruction, _) ->
 	andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst);
     false -> false
   end.
+
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_fmove(Instruction).
  
 reg_nr(Reg, _) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index cef22e5af9..7ebc6aa336 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_arm_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_arm_cfg:branch_preds(Branch).
+
 %% ARM stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,33 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_arm:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_arm:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_arm:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_arm:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_arm_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(arm).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(arm).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index a6450b4d96..81bb551bd2 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,24 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_ppc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 23cb6c0318..dcfdf6592c 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -108,6 +116,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(I, Ctx) ->
@@ -125,9 +136,24 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_ppc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_ppc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_fmove(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl
new file mode 100644
index 0000000000..39b086d9f7
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_range_split.erl
@@ -0,0 +1,1187 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       TEMPORARY LIVE RANGE SPLITTING PASS
+%%
+%% Live range splitting is useful to allow a register allocator to allocate a
+%% temporary to register for a part of its lifetime, even if it cannot be for
+%% the entirety. This improves register allocation quality, at the cost of
+%% making the allocation problem more time and memory intensive to solve.
+%%
+%% Optimal allocation can be achieved if all temporaries are split at every
+%% program point (between all instructions), but this makes register allocation
+%% infeasably slow in practice. Instead, this module uses heuristics to choose
+%% which temporaries should have their live ranges split, and at which points.
+%%
+%% The range splitter only considers temps which are live during a call
+%% instruction, since they're known to be spilled. The control-flow graph is
+%% partitioned at call instructions and splitting decisions are made separately
+%% for each partition. The register copy of a temp (if any) gets a separate name
+%% in each partition.
+%%
+%% There are three different ways the range splitter may choose to split a
+%% temporary in a program partition:
+%%
+%%  * Mode1: Spill the temp before calls, and restore it after them
+%%  * Mode2: Spill the temp after definitions, restore it after calls
+%%  * Mode3: Spill the temp after definitions, restore it before uses
+%%
+%% To pick which of these should be used for each temp×partiton pair, the range
+%% splitter uses a cost function. The cost is simply the sum of the cost of all
+%% expected stack accesses, and the cost for an individual stack access is based
+%% on the probability weight of the basic block that it resides in. This biases
+%% the range splitter so that it attempts moving stack accesses from a functions
+%% hot path to the cold path.
+%%
+%% The heuristic has a couple of tuning knobs, adjusting its preference for
+%% different spilling modes, aggressiveness, and how much influence the basic
+%% block probability weights have.
+%%
+%% Edge case not handled: Call instructions directly defining a pseudo. In that
+%% case, if that pseudo has been selected for mode2 spills, no spill is inserted
+%% after the call.
+-module(hipe_range_split).
+
+-export([split/5]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+%% -define(DEBUG, 1).
+-include("../main/hipe.hrl").
+
+%% Heuristic tuning constants
+-define(DEFAULT_MIN_GAIN, 1.1).    % option: range_split_min_gain
+-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge
+-define(DEFAULT_WEIGHT_POWER, 2).  % option: range_split_weight_power
+-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)).
+-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)).
+-define(HEUR_MAX_TEMPS, 20000).
+
+-type target_cfg()       :: any().
+-type target_instr()     :: any().
+-type target_temp()      :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type liveset()          :: ordsets:ordset(temp()).
+-type temp()             :: non_neg_integer().
+-type label()            :: non_neg_integer().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context(),
+	    comp_options())
+	   -> target_cfg().
+split(TCFG0, Liveness, TargetMod, TargetContext, Options) ->
+  Target = {TargetMod, TargetContext},
+  NoTemps = number_of_temporaries(TCFG0, Target),
+  if NoTemps > ?HEUR_MAX_TEMPS ->
+      ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n",
+		 [?MODULE, NoTemps]),
+      hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext);
+     true ->
+      Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options),
+      {CFG0, Temps} = convert(TCFG0, Target),
+      Avail = avail_analyse(TCFG0, Liveness, Target),
+      Defs = def_analyse(CFG0, TCFG0),
+      RDefs = rdef_analyse(CFG0),
+      PLive = plive_analyse(CFG0),
+      {CFG, DUCounts, Costs, DSets0} =
+	scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target),
+      {DSets, _} = hipe_dsets:to_map(DSets0),
+      Renames = decide(DUCounts, Costs, Target, Options),
+      rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	      Temps)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Internal program representation
+%%
+%% Second pass: Convert cfg to internal representation
+
+-record(cfg, {
+	  rpo_labels :: [label()],
+	  bbs        :: #{label() => bb()}
+	 }).
+-type cfg() :: #cfg{}.
+
+cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS).
+
+cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO).
+
+-record(bb, {
+	  code     :: [code_elem()],
+	  %% If the last instruction of code defines all allocatable registers
+	  has_call :: boolean(),
+	  succ     :: [label()]
+	 }).
+-type bb() :: #bb{}.
+-type code_elem() :: instr() | mode2_spills() | mode3_restores().
+
+bb_code(#bb{code=Code}) -> Code.
+bb_has_call(#bb{has_call=HasCall}) -> HasCall.
+bb_succ(#bb{succ=Succ}) -> Succ.
+
+bb_butlast(#bb{code=Code}) ->
+  bb_butlast_1(Code).
+
+bb_butlast_1([_Last]) -> [];
+bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)].
+
+bb_last(#bb{code=Code}) -> lists:last(Code).
+
+-record(instr, {
+	  i   :: target_instr(),
+	  def :: ordsets:ordset(temp()),
+	  use :: ordsets:ordset(temp())
+	 }).
+-type instr() :: #instr{}.
+
+-record(mode2_spills, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode2_spills() :: #mode2_spills{}.
+
+-record(mode3_restores, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode3_restores() :: #mode3_restores{}.
+
+-spec convert(target_cfg(), target()) -> {cfg(), temps()}.
+convert(CFG, Target) ->
+  RPO = reverse_postorder(CFG, Target),
+  {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []),
+  {#cfg{rpo_labels = RPO,
+	bbs = maps:from_list(BBsList)},
+   Temps}.
+
+convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps};
+convert_bbs([L|Ls], CFG, Target, Temps0, Acc) ->
+  Succs = hipe_gen_cfg:succ(CFG, L),
+  TBB = bb(CFG, L, Target),
+  TCode = hipe_bb:code(TBB),
+  {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []),
+  HasCall = defines_all_alloc(Last#instr.i, Target),
+  BB = #bb{code = Code,
+	   has_call = HasCall,
+	   succ = Succs},
+  convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]).
+
+convert_code([], _Target, Temps, [Last|_]=Acc) ->
+  {lists:reverse(Acc), Last, Temps};
+convert_code([TI|TIs], Target, Temps0, Acc) ->
+  {TDef, TUse} = def_use(TI, Target),
+  I = #instr{i = TI,
+	     def = ordsets:from_list(reg_names(TDef, Target)),
+	     use = ordsets:from_list(reg_names(TUse, Target))},
+  Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)),
+  convert_code(TIs, Target, Temps, [I|Acc]).
+
+-type temps() :: #{temp() => target_temp()}.
+add_temps([], _Target, Temps) -> Temps;
+add_temps([T|Ts], Target, Temps) ->
+  add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL
+%% splits)
+-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}.
+-type defs()  :: #{label() => defsetf()}.
+
+-spec def_analyse(cfg(), target_cfg()) -> defs().
+def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) ->
+  Defs0 = def_init(CFG),
+  def_dataf(RPO, TCFG, Defs0).
+
+-spec def_init(cfg()) -> defsi().
+def_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     false -> def_init_scan(bb_code(BB), defseti_new());
+	     true ->
+	       {call, def_init_scan(bb_butlast(BB), defseti_new()),
+		defseti_from_ordset((bb_last(BB))#instr.def)}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]).
+
+def_init_scan([], Defset) -> Defset;
+def_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = defseti_add_ordset(Def, Defset0),
+  def_init_scan(Is, Defset).
+
+-spec def_dataf([label()], target_cfg(), defsi()) -> defs().
+def_dataf(Labels, TCFG, Defs0) ->
+  case def_dataf_once(Labels, TCFG, Defs0, 0) of
+    {Defs, 0} ->
+      def_finalise(Defs);
+    {Defs, _Changed} ->
+      def_dataf(Labels, TCFG, Defs)
+  end.
+
+-spec def_finalise(defsi()) -> defs().
+def_finalise(Defs) ->
+  maps:from_list([{K, defseti_finalise(BL)}
+		  || {K, {call, BL, _}} <- maps:to_list(Defs)]).
+
+-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer())
+		    -> {defsi(), non_neg_integer()}.
+def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed};
+def_dataf_once([L|Ls], TCFG, Defs0, Changed0) ->
+  AddPreds =
+    fun(Defset1) ->
+	lists:foldl(fun(P, Defset2) ->
+			defseti_union(defout(P, Defs0), Defset2)
+		    end, Defset1, hipe_gen_cfg:pred(TCFG, L))
+    end,
+  Defset =
+    case Defset0 = maps:get(L, Defs0) of
+      {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout};
+      _ -> AddPreds(Defset0)
+    end,
+  Changed = case Defset =:= Defset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed).
+
+-spec defout(label(), defsi()) -> defseti().
+defout(L, Defs) ->
+  case maps:get(L, Defs) of
+    {call, _DefButLast, Defout} -> Defout;
+    Defout -> Defout
+  end.
+
+-spec defbutlast(label(), defs()) -> defsetf().
+defbutlast(L, Defs) -> maps:get(L, Defs).
+
+-spec defseti_new() -> defseti().
+-spec defseti_union(defseti(), defseti()) -> defseti().
+-spec defseti_add_ordset(ordset:ordset(temp()), defseti()) -> defseti().
+-spec defseti_from_ordset(ordset:ordset(temp())) -> defseti().
+-spec defseti_finalise(defseti()) -> defsetf().
+-spec defsetf_member(temp(), defsetf()) -> boolean().
+-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf())
+			      -> ordsets:ordset(temp()).
+
+-type defseti() :: bitord().
+defseti_new() -> bitord_new().
+defseti_union(A, B) -> bitord_union(A, B).
+defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D).
+defseti_from_ordset(OS) -> bitord_from_ordset(OS).
+defseti_finalise(D) -> bitarr_from_bitord(D).
+
+-type defsetf() :: bitarr().
+defsetf_member(E, D) -> bitarr_get(E, D).
+
+defsetf_intersect_ordset([], _D) -> [];
+defsetf_intersect_ordset([E|Es], D) ->
+  case bitarr_get(E, D) of
+    true  -> [E|defsetf_intersect_ordset(Es,D)];
+    false ->    defsetf_intersect_ordset(Es,D)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines
+%% in mode2)
+-type rdefsi() :: #{label() =>
+		     {call, rdefseti(), [label()]}
+		   | {nocall, rdefseti(), rdefseti(), [label()]}}.
+-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}.
+
+-spec rdef_analyse(cfg()) -> rdefs().
+rdef_analyse(CFG = #cfg{rpo_labels=RPO}) ->
+  Defs0 = rdef_init(CFG),
+  PO = rdef_postorder(RPO, CFG, []),
+  rdef_dataf(PO, Defs0).
+
+%% Filter out 'call' labels, since they don't change
+-spec rdef_postorder([label()], cfg(), [label()]) -> [label()].
+rdef_postorder([], _CFG, Acc) -> Acc;
+rdef_postorder([L|Ls], CFG, Acc) ->
+  case bb_has_call(cfg_bb(L, CFG)) of
+    true  -> rdef_postorder(Ls, CFG, Acc);
+    false -> rdef_postorder(Ls, CFG, [L|Acc])
+  end.
+
+-spec rdef_init(cfg()) -> rdefsi().
+rdef_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [{L, case HasCall of
+	   true ->
+	     Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()),
+	     {call, Defin, Succs};
+	   false ->
+	     Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()),
+	     {nocall, Gen, rdefseti_top(), Succs}
+	 end}
+     || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti().
+rdef_init_scan([], Defset) -> Defset;
+rdef_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = rdefseti_add_ordset(Def, Defset0),
+  rdef_init_scan(Is, Defset).
+
+-spec rdef_dataf([label()], rdefsi()) -> rdefs().
+rdef_dataf(Labels, Defs0) ->
+  case rdef_dataf_once(Labels, Defs0, 0) of
+    {Defs, 0} ->
+      rdef_finalise(Defs);
+    {Defs, _Changed} ->
+      rdef_dataf(Labels, Defs)
+  end.
+
+-spec rdef_finalise(rdefsi()) -> rdefs().
+rdef_finalise(Defs) ->
+  maps:map(fun(L, V) ->
+	       Succs = rsuccs_val(V),
+	       Defout0 = rdefout_intersect(L, Defs, rdefseti_top()),
+	       {final, rdefset_finalise(Defout0), Succs}
+	   end, Defs).
+
+-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer())
+		     -> {rdefsi(), non_neg_integer()}.
+rdef_dataf_once([], Defs, Changed) -> {Defs, Changed};
+rdef_dataf_once([L|Ls], Defs0, Changed0) ->
+  #{L := {nocall, Gen, Defin0, Succs}} = Defs0,
+  Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)),
+  Defset = {nocall, Gen, Defin, Succs},
+  Changed = case Defin =:= Defin0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed).
+
+-spec rdefin(label(), rdefsi()) -> rdefseti().
+rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)).
+rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin;
+rdefin_val({call, Defin, _Succs}) -> Defin.
+
+-spec rsuccs(label(), rdefsi()) -> [label()].
+rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)).
+rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs;
+rsuccs_val({call, _Defin, Succs}) -> Succs.
+
+-spec rdefout(label(), rdefs()) -> rdefsetf().
+rdefout(L, Defs) ->
+  #{L := {final, Defout, _Succs}} = Defs,
+  Defout.
+
+-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti().
+rdefout_intersect(L, Defs, Init) ->
+  lists:foldl(fun(S, Acc) ->
+		  rdefseti_intersect(rdefin(S, Defs), Acc)
+	      end, Init, rsuccs(L, Defs)).
+
+-type rdefseti() :: bitord() | top.
+rdefseti_top() -> top.
+rdefseti_empty() -> bitord_new().
+-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti().
+rdefseti_from_ordset(OS) -> bitord_from_ordset(OS).
+
+-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti().
+rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf
+rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D).
+
+-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_union(top, _) -> top;
+rdefseti_union(_, top) -> top;
+rdefseti_union(A, B) -> bitord_union(A, B).
+
+-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_intersect(top, D) -> D;
+rdefseti_intersect(D, top) -> D;
+rdefseti_intersect(A, B) -> bitord_intersect(A, B).
+
+-type rdefsetf() :: {arr, bitarr()} | top.
+-spec rdefset_finalise(rdefseti()) -> rdefsetf().
+rdefset_finalise(top) -> top;
+rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}.
+
+%% rdefsetf_top() -> top.
+rdefsetf_empty() -> {arr, bitarr_new()}.
+
+-spec rdefsetf_add_ordset(ordset:ordset(temp()), rdefsetf()) -> rdefsetf().
+rdefsetf_add_ordset(_, top) -> top;
+rdefsetf_add_ordset(OS, {arr, Arr}) ->
+  {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}.
+
+-spec rdef_step(instr(), rdefsetf()) -> rdefsetf().
+rdef_step(#instr{def=Def}, Defset) ->
+  %% ?ASSERT(not defines_all_alloc(I, Target)),
+  rdefsetf_add_ordset(Def, Defset).
+
+-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf())
+			      -> ordsets:ordset(temp()).
+ordset_subtract_rdefsetf(_, top) -> [];
+ordset_subtract_rdefsetf(OS, {arr, Arr}) ->
+  %% Lazy implementation; could do better if OS can grow
+  lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Integer sets represented as bit sets
+%%
+%% Two representations; bitord() and bitarr()
+-define(LIMB_IX_BITS,    11).
+-define(LIMB_BITS,       (1 bsl ?LIMB_IX_BITS)).
+-define(LIMB_IX(Index),  (Index bsr ?LIMB_IX_BITS)).
+-define(BIT_IX(Index),   (Index band (?LIMB_BITS - 1))).
+-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))).
+
+%% bitord(): fast at union/2 and can be compared for equality with '=:='
+-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitord_new() -> bitord().
+bitord_new() -> [].
+
+-spec bitord_union(bitord(), bitord()) -> bitord().
+bitord_union(Lhs, Rhs) ->
+  orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs).
+
+-spec bitord_intersect(bitord(), bitord()) -> bitord().
+bitord_intersect([], _) -> [];
+bitord_intersect(_, []) -> [];
+bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) ->
+  [{K, L band R} | bitord_intersect(Ls, Rs)];
+bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK ->
+  bitord_intersect(Ls, Rs);
+bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK ->
+  bitord_intersect(Ls, Rs).
+
+-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord().
+bitord_from_ordset([]) -> [];
+bitord_from_ordset([B|Bs]) ->
+  bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)).
+
+bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) ->
+  bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B));
+bitord_from_ordset_1([B|Bs], Key, Val) ->
+  [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))];
+bitord_from_ordset_1([], Key, Val) -> [{Key, Val}].
+
+%% bitarr(): fast (enough) at get/2
+-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitarr_new() -> bitarr().
+bitarr_new() -> array:new({default, 0}).
+
+-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean().
+bitarr_get(Index, Array) ->
+  Limb = array:get(?LIMB_IX(Index), Array),
+  0 =/= (Limb band ?BIT_MASK(Index)).
+
+-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr().
+bitarr_set(Index, Array) ->
+  Limb0 = array:get(?LIMB_IX(Index), Array),
+  Limb = Limb0 bor ?BIT_MASK(Index),
+  array:set(?LIMB_IX(Index), Limb, Array).
+
+-spec bitarr_from_bitord(bitord()) -> bitarr().
+bitarr_from_bitord(Ord) ->
+  array:from_orddict(Ord, 0).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Sixth pass: Partition-local liveness analysis
+%%
+%% As temps are not spilled when exiting a partition in mode2, only
+%% partition-local uses need to be considered when deciding which temps need
+%% restoring at partition entry.
+
+-type plive() :: #{label() =>
+		     {call, liveset(), [label()]}
+		   | {nocall, {liveset(), liveset()}, liveset(), [label()]}}.
+
+-spec plive_analyse(cfg()) -> plive().
+plive_analyse(CFG) ->
+  Defs0 = plive_init(CFG),
+  PO = cfg_postorder(CFG),
+  plive_dataf(PO, Defs0).
+
+-spec plive_init(cfg()) -> plive().
+plive_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     true ->
+	       {Gen, _} = plive_init_scan(bb_code(BB)),
+	       {call, Gen, Succs};
+	     false ->
+	       GenKill = plive_init_scan(bb_code(BB)),
+	       {nocall, GenKill, liveset_empty(), Succs}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec plive_init_scan([instr()]) -> {liveset(), liveset()}.
+plive_init_scan([]) -> {liveset_empty(), liveset_empty()};
+plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) ->
+  {Gen0, Kill0} = plive_init_scan(Is),
+  Gen1 = liveset_subtract(Gen0, InstrKill),
+  Gen = liveset_union(Gen1, InstrGen),
+  Kill1 = liveset_union(Kill0, InstrKill),
+  Kill = liveset_subtract(Kill1, InstrGen),
+  {Gen, Kill}.
+
+-spec plive_dataf([label()], plive()) -> plive().
+plive_dataf(Labels, PLive0) ->
+  case plive_dataf_once(Labels, PLive0, 0) of
+    {PLive, 0} -> PLive;
+    {PLive, _Changed} ->
+      plive_dataf(Labels, PLive)
+  end.
+
+-spec plive_dataf_once([label()], plive(), non_neg_integer()) ->
+			  {plive(), non_neg_integer()}.
+plive_dataf_once([], PLive, Changed) -> {PLive, Changed};
+plive_dataf_once([L|Ls], PLive0, Changed0) ->
+  Liveset =
+    case Liveset0 = maps:get(L, PLive0) of
+      {call, Livein, Succs} ->
+	{call, Livein, Succs};
+      {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} ->
+	Liveout = pliveout(L, PLive0),
+	Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)),
+	{nocall, GenKill, Livein, Succs}
+    end,
+  Changed = case Liveset =:= Liveset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed).
+
+-spec pliveout(label(), plive()) -> liveset().
+pliveout(L, PLive) ->
+  liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]).
+
+-spec psuccs(label(), plive()) -> [label()].
+psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)).
+psuccs_val({call, _Livein, Succs}) -> Succs;
+psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs.
+
+-spec plivein(label(), plive()) -> liveset().
+plivein(L, PLive) -> plivein_val(maps:get(L, PLive)).
+plivein_val({call, Livein, _Succs}) -> Livein;
+plivein_val({nocall, _GenKill, Livein, _Succs}) ->  Livein.
+
+liveset_empty() -> ordsets:new().
+liveset_subtract(A, B) -> ordsets:subtract(A, B).
+liveset_union(A, B) -> ordsets:union(A, B).
+liveset_union(LivesetList) -> ordsets:union(LivesetList).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Third pass: Compute dataflow analyses required for placing mode3
+%% spills/restores.
+%% Reuse analysis implementation in hipe_restore_reuse.
+%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one
+%% too.
+-type avail() :: hipe_restore_reuse:avail().
+
+-spec avail_analyse(target_cfg(), liveness(), target()) -> avail().
+avail_analyse(CFG, Liveness, Target) ->
+  hipe_restore_reuse:analyse(CFG, Liveness, Target).
+
+-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()).
+mode3_split_in_block(L, Avail) ->
+  hipe_restore_reuse:split_in_block(L, Avail).
+
+-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()).
+mode3_block_renameset(L, Avail) ->
+  hipe_restore_reuse:renamed_in_block(L, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Seventh pass
+%%
+%% Compute program space partitioning, collect information required by the
+%% heuristic.
+-type part_key() :: label().
+-type part_dsets() :: hipe_dsets:dsets(part_key()).
+-type part_dsets_map() :: #{part_key() => part_key()}.
+-type ducounts() :: #{part_key() => ducount()}.
+
+-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(),
+	   target()) -> {cfg(), ducounts(), costs(), part_dsets()}.
+scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) ->
+  #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0,
+  CFG = CFG0#cfg{bbs=#{}}, % kill reference
+  DSets0 = hipe_dsets:new(Labels),
+  Costs0 = costs_new(),
+  {BBs, DUCounts0, Costs1, DSets1} =
+    scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail,
+	     Target, #{}, Costs0, DSets0, []),
+  {RLList, DSets2} = hipe_dsets:to_rllist(DSets1),
+  {Costs, DSets} = costs_map_roots(DSets2, Costs1),
+  DUCounts = collect_ducounts(RLList, DUCounts0, #{}),
+  {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}.
+
+-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts())
+		      -> ducounts().
+collect_ducounts([], _, Acc) -> Acc;
+collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) ->
+  DUCount = lists:foldl(
+	      fun(Key, FAcc) ->
+		  ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc)
+	      end, ducount_new(), Ls),
+  collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}).
+
+-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(),
+	       rdefs(), avail(), target(), ducounts(), costs(), part_dsets(),
+	       [{label(), bb()}])
+	      -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}.
+scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target,
+	 DUCounts, Costs, DSets, Acc) ->
+  {Acc, DUCounts, Costs, DSets};
+scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target,
+	 DUCounts0, Costs0, DSets0, Acc) ->
+  Wt = weight(L, Weights),
+  {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} =
+    case bb_has_call(BB) of
+      false ->
+	DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end,
+			     DSets0, bb_succ(BB)),
+	{DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs),
+	 liveout(Liveness, L, Target)};
+      true ->
+	LastI = #instr{def=LastDef} = bb_last(BB),
+	LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef),
+	%% We can omit the spill of a temp that has not been defined since the
+	%% last time it was spilled
+	SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)),
+	Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0),
+	Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) ->
+				 SLivein = livein(Liveness, S, Target),
+				 SPLivein = plivein(S, PLive),
+				 SWt = weight_scaled(L, BranchWt, Weights),
+				 Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2),
+				 costs_insert(entry2, S, SWt, SPLivein, Costs3)
+			     end, Costs1, branch_preds(LastI#instr.i, Target)),
+	{DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore}
+    end,
+  Mode3Splits = mode3_split_in_block(L, Avail),
+  {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []),
+  {Code, DUCount, Mode2Spills} =
+    scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode),
+  DUCounts = DUCounts0#{L => DUCount},
+  M2SpillSet = ordsets:from_list(Mode2Spills),
+  Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5),
+  Mode3Renames = mode3_block_renameset(L, Avail),
+  Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6),
+  Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7),
+  Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8),
+  scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts,
+	   Costs, DSets, [{L,BB#bb{code=Code}}|Acc]).
+
+-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs())
+			     -> costs().
+add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) ->
+  Unsplit = orddict_without_ordset(Mode3Renames,
+				   orddict:from_list(ducount_to_list(DUCount))),
+  add_unsplit_mode3_costs_1(Unsplit, L, Costs).
+
+-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs())
+			       -> costs().
+add_unsplit_mode3_costs_1([], _L, Costs) -> Costs;
+add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) ->
+  add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)).
+
+%% @doc Returns a new orddict without keys in Set and their associated values.
+-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			    -> orddict:orddict(K, V).
+orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K ->
+  orddict_without_ordset(Ss, Dict);
+orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K ->
+  [D|orddict_without_ordset(Set, Ds)];
+orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K
+  orddict_without_ordset(Ss, Ds);
+orddict_without_ordset(_, []) -> [];
+orddict_without_ordset([], Dict) -> Dict.
+
+%% Scans the code forward, collecting and inserting mode3 restores
+-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()),
+		  [code_elem()])
+		 -> {[code_elem()], ordsets:ordset(temp())}.
+scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored};
+scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) ->
+  #instr{def=Def, use=Use} = I,
+  {ToRestore, SplitHere1} =
+    lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0),
+  SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1),
+  Acc =
+    case ToRestore of
+      [] -> [I | Acc0];
+      _  -> [I, #mode3_restores{temps=ToRestore} | Acc0]
+    end,
+  scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc).
+
+%% Scans the code backwards, collecting def/use counts and mode2 spills
+-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(),
+	      [temp()], [code_elem()])
+	     -> {[code_elem()], ducount(), [temp()]}.
+scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) ->
+  {Acc, DUCount, Spills};
+scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) ->
+  scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]);
+scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) ->
+  #instr{def=Def,use=Use} = I,
+  DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)),
+  Livein = liveness_step(I, Liveout),
+  RDefin = rdef_step(I, RDefout),
+  %% The temps that would be spilled after I in mode 2
+  NewSpills = ordset_subtract_rdefsetf(
+		ordsets:intersection(Def, Liveout),
+		RDefout),
+  ?ASSERT(NewSpills =:= (NewSpills -- Spills0)),
+  Spills = NewSpills ++ Spills0,
+  Acc1 = case NewSpills of
+	   [] -> Acc0;
+	   _ -> [#mode2_spills{temps=NewSpills}|Acc0]
+	 end,
+  scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]).
+
+-spec liveness_step(instr(), liveset()) -> liveset().
+liveness_step(#instr{def=Def, use=Use}, Liveout) ->
+  ordsets:union(Use, ordsets:subtract(Liveout, Def)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% First pass: compute basic-block weighting
+
+-type weights() :: no_bb_weights
+		 | {hipe_bb_weights:bb_weights(), float()}.
+
+-spec weight(label(), weights()) -> float().
+weight(L, Weights) -> weight_scaled(L, 1.0, Weights).
+
+-spec compute_weights(target_cfg(), target_module(), target_context(),
+		      comp_options()) -> weights().
+compute_weights(CFG, TargetMod, TargetContext, Options) ->
+  case proplists:get_bool(range_split_weights, Options) of
+    false -> no_bb_weights;
+    true ->
+      {hipe_bb_weights:compute(CFG, TargetMod, TargetContext),
+       ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power,
+					     Options, ?DEFAULT_WEIGHT_POWER))}
+  end.
+
+-spec weight_scaled(label(), float(), weights()) -> float().
+weight_scaled(_L, _Scale, no_bb_weights) -> 1.0;
+weight_scaled(L, Scale, {Weights, Const}) ->
+  Wt0 = hipe_bb_weights:weight(L, Weights) * Scale,
+  Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0),
+  ?WEIGHT_FUN(Wt, Const).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Heuristic splitting decision.
+%%
+%% Decide which temps to split, in which parts, and pick new names for them.
+-type spill_mode() :: mode1 % Spill temps at partition exits
+		    | mode2 % Spill temps at definitions
+		    | mode3.% Spill temps at definitions, restore temps at uses
+-type ren() :: #{temp() => {spill_mode(), temp()}}.
+-type renames() :: #{label() => ren()}.
+
+-record(heur_par, {
+	  mode1_fudge :: float(),
+	  min_gain    :: float()
+	  }).
+-type heur_par() :: #heur_par{}.
+
+-spec decide(ducounts(), costs(), target(), comp_options()) -> renames().
+decide(DUCounts, Costs, Target, Options) ->
+  Par = #heur_par{
+	   mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options,
+					     ?DEFAULT_MODE1_FUDGE),
+	   min_gain    = proplists:get_value(range_split_min_gain, Options,
+					     ?DEFAULT_MIN_GAIN)},
+  decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}).
+
+-spec decide_parts([{part_key(), ducount()}], costs(), target(),
+		   heur_par(), renames())
+		  -> renames().
+decide_parts([], _Costs, _Target, _Par, Acc) -> Acc;
+decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) ->
+  Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par,
+			#{}),
+  decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}).
+
+-spec decide_temps([{temp(), float()}], part_key(), costs(), target(),
+		   heur_par(), ren())
+		  -> ren().
+decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc;
+decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) ->
+  SpillCost1 = costs_query(Temp, entry1, Part, Costs)
+    + costs_query(Temp, exit, Part, Costs),
+  SpillCost2 = costs_query(Temp, entry2, Part, Costs)
+    + costs_query(Temp, spill, Part, Costs),
+  SpillCost3 = costs_query(Temp, restore, Part, Costs),
+  Acc =
+    %% SpillCost1 =:= 0.0 usually means the temp is local to the partition;
+    %% hence no need to split it
+    case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S)
+      andalso (not is_precoloured(Temp, Target))
+      andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain))
+    of
+      false -> Acc0;
+      true ->
+	Mode =
+	  if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2,
+	     Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 ->
+	      mode1;
+	     SpillCost2 < SpillCost3 ->
+	      mode2;
+	     true ->
+	      mode3
+	  end,
+	Acc0#{Temp => {Mode, new_reg_nr(Target)}}
+  end,
+  decide_temps(Ts, Part, Costs, Target, Par, Acc).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Eighth pass: Rewrite program performing range splitting.
+
+-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(),
+	      avail(), part_dsets_map(), renames(), temps())
+	     -> target_cfg().
+rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets,
+	Renames, Temps) ->
+  rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets,
+	      Renames, Temps, TCFG).
+
+-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(),
+		  avail(), part_dsets_map(), renames(), temps(), target_cfg())
+		 -> target_cfg().
+rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames,
+	    _Temps, TCFG) ->
+  TCFG;
+rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	    Temps, TCFG0) ->
+  Code0Rev = lists:reverse(bb_code(BB)),
+  EntryRen = maps:get(maps:get(L,DSets), Renames),
+  M3Ren = mode3_block_renameset(L, Avail),
+  SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren),
+  Fun = fun(I) -> subst_temps(SubstFun, I, Target) end,
+  {Code, TCFG} =
+    case bb_has_call(BB) of
+      false ->
+	Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target,
+			       []),
+	{Code1, TCFG0};
+      true ->
+	CallI0 = hd(Code0Rev),
+	Succ = bb_succ(BB),
+	{CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets,
+					 Renames, Temps, CallI0#instr.i, TCFG0),
+	Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)),
+	Defout = defbutlast(L, Defs),
+	SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target),
+	Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps,
+			       Target, []),
+	Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]),
+	{Code2, TCFG1}
+    end,
+  TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code),
+  rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps,
+	      update_bb(TCFG, L, TBB, Target)).
+
+-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(),
+		     ordsets:ordset(temp()), temps(), target(),
+		     [target_instr()])
+		    -> [target_instr()].
+rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc;
+rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) ->
+  Acc =
+    case I of
+      #instr{i=TI} -> [Fun(TI)|Acc0];
+      #mode2_spills{temps=Mode2Spills} ->
+	add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0);
+      #mode3_restores{temps=Mode3Restores} ->
+	add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0)
+    end,
+  rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc).
+
+-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(),
+		       ordsets:ordset(temp()), temps(), [target_instr()])
+		      -> [target_instr()].
+add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc;
+add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) ->
+  Acc =
+    case Ren of
+      #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 ->
+	case Mode =/= mode3 orelse lists:member(R, M3Ren) of
+	  false -> Acc0;
+	  true ->
+	    #{R := T} = Temps,
+	    SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target),
+	    [SpillInstr|Acc0]
+	end;
+    #{} ->
+	Acc0
+  end,
+  add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc).
+
+-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(),
+			 [target_instr()])
+			-> [target_instr()].
+add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc;
+add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) ->
+  case Ren of
+    #{R := {mode3, NewName}} ->
+      #{R := T} = Temps,
+      RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target),
+      add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]);
+    #{} ->
+      add_mode3_restores(Rs, Target, Ren, Temps, Acc)
+  end.
+
+-type rewrite_fun() :: fun((target_instr()) -> target_instr()).
+-type subst_fun() :: fun((target_temp()) -> target_temp()).
+-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun().
+rewrite_subst_fun(Target, Ren, M3Ren) ->
+  fun(Temp) ->
+      Reg = reg_nr(Temp, Target),
+      case Ren of
+	#{Reg := {Mode, NewName}} ->
+	  case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of
+	    false -> Temp;
+	    true -> update_reg_nr(NewName, Temp, Target)
+	  end;
+	#{} -> Temp
+      end
+  end.
+
+-type spillmap() :: [{temp(), target_instr()}].
+-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target())
+		 -> spillmap().
+mk_spillmap(Ren, Livein, Defout, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)}
+   end || {Reg, {mode1, NewName}} <- maps:to_list(Ren),
+	  lists:member(Reg, Livein), defsetf_member(Reg, Defout)].
+
+-spec mk_restores(ren(), liveset(), liveset(), temps(), target())
+		 -> [target_instr()].
+mk_restores(Ren, Livein, PLivein, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target)
+   end || {Reg, {Mode, NewName}} <- maps:to_list(Ren),
+	  (       (Mode =:= mode1 andalso lists:member(Reg, Livein ))
+	   orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))].
+
+-spec inject_restores([label()], target(), liveness(), plive(),
+		      part_dsets_map(), renames(), temps(), target_instr(),
+		      target_cfg())
+		     -> {target_instr(), target_cfg()}.
+inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI,
+		TCFG) ->
+  {CFTI, TCFG};
+inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0,
+		TCFG0) ->
+  Ren = maps:get(maps:get(L,DSets), Renames),
+  Livein = livein(Liveness, L, Target),
+  PLivein = plivein(L, PLive),
+  {CFTI, TCFG} =
+    case mk_restores(Ren, Livein, PLivein, Temps, Target) of
+      [] -> {CFTI0, TCFG0}; % optimisation
+      Restores ->
+	RestBBLbl = new_label(Target),
+	Code = Restores ++ [mk_goto(L, Target)],
+	CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target),
+	TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target),
+	{CFTI1, TCFG1}
+    end,
+  inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI,
+		  TCFG).
+
+%% Heuristic. Move spills up until we meet the edge of the BB or a definition of
+%% that temp.
+-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()])
+		 -> [target_instr()].
+lift_spills([], _Target, SpillMap, Acc) ->
+  [SpillI || {_, SpillI} <- SpillMap] ++ Acc;
+lift_spills([I|Is], Target, SpillMap0, Acc) ->
+  Def = reg_defines(I, Target),
+  {Spills0, SpillMap} =
+    lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0),
+  Spills = [SpillI || {_, SpillI} <- Spills0],
+  lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]).
+
+reg_defines(I, Target) ->
+  reg_names(defines(I,Target), Target).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Costs ADT
+%%
+%% Keeps track of cumulative cost of spilling temps in particular partitions
+%% using particular spill modes.
+-type cost_map() :: #{[part_key()|temp()] => float()}.
+-type cost_key() :: entry1 | entry2 | exit | spill | restore.
+-record(costs, {entry1  = #{} :: cost_map()
+	       ,entry2  = #{} :: cost_map()
+	       ,exit    = #{} :: cost_map()
+	       ,spill   = #{} :: cost_map()
+	       ,restore = #{} :: cost_map()
+	       }).
+-type costs() :: #costs{}.
+
+-spec costs_new() -> costs().
+costs_new() -> #costs{}.
+
+-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs())
+		  -> costs().
+costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) ->
+  Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)};
+costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) ->
+  Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)};
+costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) ->
+  Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)};
+costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) ->
+  Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)};
+costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) ->
+  Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}.
+
+costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) ->
+  lists:foldl(fun(Live, CostMap1) ->
+		  map_update_counter([A|Live], Weight, CostMap1)
+	      end, CostMap0, Liveset).
+
+-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}.
+costs_map_roots(DSets0, Costs) ->
+  {Entry1,  DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1),
+  {Entry2,  DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2),
+  {Exit,    DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit),
+  {Spill,   DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill),
+  {Restore, DSets}  = costs_map_roots_1(DSets4, Costs#costs.restore),
+  {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore},
+   DSets}.
+
+costs_map_roots_1(DSets0, CostMap) ->
+  {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) ->
+				      {AR, DSets2} = hipe_dsets:find(A, DSets1),
+				      {{[AR|T], Wt}, DSets2}
+				  end, DSets0, maps:to_list(CostMap)),
+  {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}.
+
+maps_from_list_merge([], _MF, Acc) -> Acc;
+maps_from_list_merge([{K,V}|Ps], MF, Acc) ->
+  maps_from_list_merge(Ps, MF, case Acc of
+				 #{K := OV} -> Acc#{K := MF(V, OV)};
+				 #{}        -> Acc#{K => V}
+			       end).
+
+-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float().
+costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) ->
+  costs_query_1(Temp, Part, Entry1);
+costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) ->
+  costs_query_1(Temp, Part, Entry2);
+costs_query(Temp, exit, Part, #costs{exit=Exit}) ->
+  costs_query_1(Temp, Part, Exit);
+costs_query(Temp, spill, Part, #costs{spill=Spill}) ->
+  costs_query_1(Temp, Part, Spill);
+costs_query(Temp, restore, Part, #costs{restore=Restore}) ->
+  costs_query_1(Temp, Part, Restore).
+
+costs_query_1(Temp, Part, CostMap) ->
+  Key = [Part|Temp],
+  case CostMap of
+    #{Key := Wt} -> Wt;
+    #{} -> 0.0
+  end.
+
+-spec map_update_counter(Key, number(), #{Key => number(), OK => OV})
+			-> #{Key := number(), OK => OV}.
+map_update_counter(Key, Incr, Map) ->
+  case Map of
+    #{Key := Orig} -> Map#{Key := Orig + Incr};
+    #{}            -> Map#{Key => Incr}
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Def and use counting ADT
+-type ducount() :: #{temp() => float()}.
+
+-spec ducount_new() -> ducount().
+ducount_new() -> #{}.
+
+-spec ducount_add([temp()], float(), ducount()) -> ducount().
+ducount_add([], _Weight, DUCount) -> DUCount;
+ducount_add([T|Ts], Weight, DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := Count} -> DUCount0#{T := Count + Weight};
+      #{}           -> DUCount0#{T => Weight}
+    end,
+  ducount_add(Ts, Weight, DUCount).
+
+ducount_to_list(DUCount) -> maps:to_list(DUCount).
+
+-spec ducount_merge(ducount(), ducount()) -> ducount().
+ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCA), DCB);
+ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCB), DCA).
+
+ducount_merge_1([], DUCount) -> DUCount;
+ducount_merge_1([{T,AC}|Ts], DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := BC} -> DUCount0#{T := AC + BC};
+      #{}        -> DUCount0#{T => AC}
+    end,
+  ducount_merge_1(Ts, DUCount).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_1(number_of_temporaries).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+branch_preds(Instr, {TgtMod,TgtCtx}) ->
+  merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))).
+
+livein(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+merge_sorted_preds([]) -> [];
+merge_sorted_preds([{L, P1}, {L, P2}|LPs]) ->
+  merge_sorted_preds([{L, P1+P2}|LPs]);
+merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)].
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl
index 5bbb0ba7c1..29ef3adcc2 100644
--- a/lib/hipe/regalloc/hipe_regalloc_loop.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl
@@ -32,9 +32,11 @@ ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) ->
 ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod,
 	  TargetCtx) ->
   ?inc_counter(ra_calls_counter, 1),
-  SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx),
+  {CFG1, Liveness1} =
+    do_range_split(CFG0, Liveness0, TargetMod, TargetCtx, Options),
+  SpillLimit0 = TargetMod:number_of_temporaries(CFG1, TargetCtx),
   {Coloring, _, CFG, Liveness} =
-    call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options,
+    call_allocator_initial(CFG1, Liveness1, SpillLimit0, SpillIndex, Options,
 			   RegAllocMod, TargetMod, TargetCtx),
   %% The first iteration, the hipe_regalloc_prepass may create new temps, these
   %% should not end up above SpillLimit.
@@ -96,3 +98,20 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod,
       RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod,
 			   TargetCtx, Options)
   end.
+
+do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) ->
+  {CFG2, Liveness1} =
+    case proplists:get_bool(ra_restore_reuse, Options) of
+      true ->
+	CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx),
+	{CFG1, TgtMod:analyze(CFG1, TgtCtx)};
+      false ->
+	{CFG0, Liveness0}
+    end,
+  case proplists:get_bool(ra_range_split, Options) of
+    true ->
+      CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options),
+      {CFG3, TgtMod:analyze(CFG3, TgtCtx)};
+    false ->
+      {CFG2, Liveness1}
+  end.
diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
index e212420ad2..5024840237 100644
--- a/lib/hipe/regalloc/hipe_regalloc_prepass.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
@@ -483,8 +483,8 @@ merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets};
 merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) ->
   merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]);
 merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets}  = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets}  = hipe_dsets:find({exit,L},  DSets1),
   case EntryRoot =:= ExitRoot of
     false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]);
     true ->
@@ -501,7 +501,7 @@ merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
 
 -spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}.
 merge_small_parts(DSets0) ->
-  {RLList, DSets1} = dsets_to_rllist(DSets0),
+  {RLList, DSets1} = hipe_dsets:to_rllist(DSets0),
   RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList],
   merge_small_parts_1(RLLList, DSets1, []).
 
@@ -518,8 +518,8 @@ merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc)
   merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]);
 merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) ->
   ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS),
-  DSets1 = dsets_union(R1, R2, DSets0),
-  {R, DSets} = dsets_find(R1, DSets1),
+  DSets1 = hipe_dsets:union(R1, R2, DSets0),
+  {R, DSets} = hipe_dsets:find(R1, DSets1),
   merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc).
 
 %% @doc Partition an ordering over BBs into subsequences for the dsets that
@@ -531,8 +531,8 @@ part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}).
 
 part_order([], DSets, Acc) -> {Acc, DSets};
 part_order([L|Ls], DSets0, Acc0) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets2} = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets2} = hipe_dsets:find({exit,L},  DSets1),
   Acc1 = map_append(EntryRoot, L, Acc0),
   %% Only include the label once if both entry and exit is in same partition
   Acc2 = case EntryRoot =:= ExitRoot of
@@ -558,73 +558,26 @@ map_append(Key, Elem, Map) ->
 %% split point, and one from the end to the last split point.
 
 -type bb_dset_key() :: {entry | exit, label()}.
--type bb_dsets() :: dsets(bb_dset_key()).
+-type bb_dsets() :: hipe_dsets:dsets(bb_dset_key()).
 -type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}].
 
 -spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets().
 initial_dsets(CFG, TgtMod, TgtCtx) ->
   Labels = TgtMod:labels(CFG, TgtCtx),
-  DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
+  DSets0 = hipe_dsets:new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
   Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)]
 			|| L <- Labels]),
-  lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end,
+  lists:foldl(fun({X, Y}, DS) -> hipe_dsets:union({exit,X}, {entry,Y}, DS) end,
 	      DSets0, Edges).
 
 -spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets().
 join_whole_blocks(PartBBList, DSets0) ->
-  lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS);
+  lists:foldl(fun({L, {single, _}}, DS) ->
+		  hipe_dsets:union({entry,L}, {exit,L}, DS);
 		 ({_, {split, _, _}}, DS) -> DS
 	      end, DSets0, PartBBList).
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% The disjoint set forests data structure, for elements of arbitrary types.
-%% Note that the find operation mutates the set.
-%%
-%% We could do this more efficiently if we restricted the elements to integers,
-%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used,
-%% for a persistent interface (which isn't that nice when even accessors return
-%% modified copies), the array module could be used.
--type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}.
-
--spec dsets_new([E]) -> dsets(E).
-dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]).
-
--spec dsets_find(E, dsets(E)) -> {E, dsets(E)}.
-dsets_find(E, DS0) ->
-  case DS0 of
-    #{E := {root,_}} -> {E, DS0};
-    #{E := {node,N}} ->
-      case dsets_find(N, DS0) of
-	{N, _}=T -> T;
-	{R, DS1} -> {R, DS1#{E := {node,R}}}
-      end
-   ;_ -> error(badarg, [E, DS0])
-  end.
-
--spec dsets_union(E, E, dsets(E)) -> dsets(E).
-dsets_union(X, Y, DS0) ->
-  {XRoot, DS1} = dsets_find(X, DS0),
-  case dsets_find(Y, DS1) of
-    {XRoot, DS2} -> DS2;
-    {YRoot, DS2} ->
-      #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2,
-      if XRR < YRR -> DS2#{XRoot := {node,YRoot}};
-	 XRR > YRR -> DS2#{YRoot := {node,XRoot}};
-	 true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}}
-      end
-  end.
-
--spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}.
-dsets_to_rllist(DS0) ->
-  {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0),
-  {maps:to_list(Lists), DS}.
-
-dsets_to_rllist([], Acc, DS) -> {Acc, DS};
-dsets_to_rllist([E|Es], Acc, DS0) ->
-  {ERoot, DS} = dsets_find(E, DS0),
-  dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS).
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Third pass
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Collect all referenced temps in each partition.
diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl
new file mode 100644
index 0000000000..2158bd185e
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_restore_reuse.erl
@@ -0,0 +1,516 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       RESTORE REUSE LIVE RANGE SPLITTING PASS
+%%
+%% This is a simple live range splitter that tries to avoid sequences where a
+%% temporary is accessed on stack multiple times by keeping a copy of that temp
+%% around in a register.
+%%
+%% At any point where a temporary that is expected to be spilled (see uses of
+%% spills_add_list/2) is defined or used, this pass considers that temporary
+%% "available".
+%%
+%% Limitations:
+%%   * If a live range part starts with several different restores, this module
+%%     will introduce a new temp number for each of them, and later be forced to
+%%     generate phi blocks. It would be more efficient to introduce just a
+%%     single temp number. That would also remove the need for the phi blocks.
+%%   * If a live range part ends in a definition, that definition should just
+%%     define the base temp rather than the substitution, since some CISC
+%%     targets might be able to inline the memory access in the instruction.
+-module(hipe_restore_reuse).
+
+-export([split/4]).
+
+%% Exports for hipe_range_split, which uses restore_reuse as one possible spill
+%% "mode"
+-export([analyse/3
+	,renamed_in_block/2
+	,split_in_block/2
+	]).
+-export_type([avail/0]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+-include("../main/hipe.hrl").
+
+-type target_cfg()       :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type label()            :: non_neg_integer().
+-type reg()              :: non_neg_integer().
+-type instr()            :: any().
+-type temp()             :: any().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context())
+	   -> target_cfg().
+split(CFG, Liveness, TargetMod, TargetContext) ->
+  Target = {TargetMod, TargetContext},
+  Avail = analyse(CFG, Liveness, Target),
+  rewrite(CFG, Target, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-opaque avail() :: #{label() => avail_bb()}.
+
+-record(avail_bb, {
+	  %% Blocks where HasCall is true are considered to have too high
+	  %% register pressure to support a register copy of a temp
+	  has_call :: boolean(),
+	  %% AvailOut: Temps that can be split (are available)
+	  out      :: availset(),
+	  %% Gen: AvailOut generated locally
+	  gen      :: availset(),
+	  %% WantIn: Temps that are split
+	  want     :: regset(),
+	  %% Self: Temps with avail-want pairs locally
+	  self     :: regset(),
+	  %% DefIn: Temps shadowed by later def in same live range part
+	  defin    :: regset(),
+	  pred     :: [label()],
+	  succ     :: [label()]
+	 }).
+-type avail_bb() :: #avail_bb{}.
+
+avail_get(L, Avail) -> maps:get(L, Avail).
+avail_set(L, Val, Avail) -> maps:put(L, Val, Avail).
+avail_has_call(L, Avail) -> (avail_get(L, Avail))#avail_bb.has_call.
+avail_out(L, Avail) -> (avail_get(L, Avail))#avail_bb.out.
+avail_self(L, Avail) -> (avail_get(L, Avail))#avail_bb.self.
+avail_pred(L, Avail) -> (avail_get(L, Avail))#avail_bb.pred.
+avail_succ(L, Avail) -> (avail_get(L, Avail))#avail_bb.succ.
+
+avail_in(L, Avail) ->
+  case avail_pred(L, Avail) of
+    [] -> availset_empty(); % entry
+    Pred ->
+      lists:foldl(fun(P, ASet) ->
+		      availset_intersect(avail_out(P, Avail), ASet)
+		  end, availset_top(), Pred)
+  end.
+
+want_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.want.
+want_out(L, Avail) ->
+  lists:foldl(fun(S, Set) ->
+		  ordsets:union(want_in(S, Avail), Set)
+	      end, ordsets:new(), avail_succ(L, Avail)).
+
+def_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.defin.
+def_out(L, Avail) ->
+  case avail_succ(L, Avail) of
+    [] -> ordsets:new(); % entry
+    Succ ->
+      ordsets:intersection([def_in(S, Avail) || S <- Succ])
+  end.
+
+-type regset()  :: ordsets:ordset(reg()).
+-type availset() :: top | regset().
+availset_empty() -> [].
+availset_top() -> top.
+availset_intersect(top, B) -> B;
+availset_intersect(A, top) -> A;
+availset_intersect(A, B) -> ordsets:intersection(A, B).
+availset_union(top, _) -> top;
+availset_union(_, top) -> top;
+availset_union(A, B) -> ordsets:union(A, B).
+ordset_intersect_availset(OS, top) -> OS;
+ordset_intersect_availset(OS, AS) -> ordsets:intersection(OS, AS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Analysis pass
+%%
+%% The analysis pass collects the set of temps we're interested in splitting
+%% (Spills), and computes three dataflow analyses for this subset of temps.
+%%
+%% Avail, which is the set of temps which are available in register from a
+%%   previous (potential) spill or restore without going through a HasCall
+%%   block.
+%% Want, which is a liveness analysis for the subset of temps used by an
+%%   instruction that are also in Avail at that point. In other words, Want is
+%%   the set of temps that are split (has a register copy) at a particular
+%%   point.
+%% Def, which are the temps that are already going to be spilled later, and so
+%%   need not be spilled when they're defined.
+%%
+%% Lastly, it computes the set Self for each block, which is the temps that have
+%% avail-want pairs in the same block, and so should be split in that block even
+%% if they're not in WantIn for the block.
+
+-spec analyse(target_cfg(), liveness(), target()) -> avail().
+analyse(CFG, Liveness, Target) ->
+  Avail0 = analyse_init(CFG, Liveness, Target),
+  RPO = reverse_postorder(CFG, Target),
+  AvailLs = [L || L <- RPO, not avail_has_call(L, Avail0)],
+  Avail1 = avail_dataf(AvailLs, Avail0),
+  Avail2 = analyse_filter_want(maps:keys(Avail1), Avail1),
+  PO = lists:reverse(RPO),
+  want_dataf(PO, Avail2).
+
+-spec analyse_init(target_cfg(), liveness(), target()) -> avail().
+analyse_init(CFG, Liveness, Target) ->
+  analyse_init(labels(CFG, Target), CFG, Liveness, Target, #{}, []).
+
+-spec analyse_init([label()], target_cfg(), liveness(), target(), spillset(),
+		   [{label(), avail_bb()}])
+		  -> avail().
+analyse_init([], _CFG, _Liveness, Target, Spills0, Acc) ->
+  %% Precoloured temps can't be spilled
+  Spills = spills_filter(fun(R) -> not is_precoloured(R, Target) end, Spills0),
+  analyse_init_1(Acc, Spills, []);
+analyse_init([L|Ls], CFG, Liveness, Target, Spills0, Acc) ->
+  {DefIn, Gen, Self, Want, HasCall0} =
+    analyse_scan(hipe_bb:code(bb(CFG, L, Target)), Target,
+		 ordsets:new(), ordsets:new(), ordsets:new(),
+		 ordsets:new()),
+  {Spills, Out, HasCall} =
+    case HasCall0 of
+      false -> {Spills0, availset_top(), false};
+      {true, CallDefs} ->
+	Spill = ordsets:subtract(liveout(Liveness, L, Target), CallDefs),
+	{spills_add_list(Spill, Spills0), Gen, true}
+    end,
+  Pred = hipe_gen_cfg:pred(CFG, L),
+  Succ = hipe_gen_cfg:succ(CFG, L),
+  Val = #avail_bb{gen=Gen, want=Want, self=Self, out=Out, has_call=HasCall,
+		  pred=Pred, succ=Succ, defin=DefIn},
+  analyse_init(Ls, CFG, Liveness, Target, Spills, [{L, Val} | Acc]).
+
+-spec analyse_init_1([{label(), avail_bb()}], spillset(),
+		     [{label(), avail_bb()}])
+		    -> avail().
+analyse_init_1([], _Spills, Acc) -> maps:from_list(Acc);
+analyse_init_1([{L, Val0}|Vs], Spills, Acc) ->
+  #avail_bb{out=Out,gen=Gen,want=Want,self=Self} = Val0,
+  Val = Val0#avail_bb{
+	  out  = spills_filter_availset(Out,  Spills),
+	  gen  = spills_filter_availset(Gen,  Spills),
+	  want = spills_filter_availset(Want, Spills),
+	  self = spills_filter_availset(Self, Spills)},
+  analyse_init_1(Vs, Spills, [{L, Val} | Acc]).
+
+-type spillset() :: #{reg() => []}.
+-spec spills_add_list([reg()], spillset()) -> spillset().
+spills_add_list([], Spills) -> Spills;
+spills_add_list([R|Rs], Spills) -> spills_add_list(Rs, Spills#{R => []}).
+
+-spec spills_filter_availset(availset(), spillset()) -> availset().
+spills_filter_availset([E|Es], Spills) ->
+  case Spills of
+    #{E := _} -> [E|spills_filter_availset(Es, Spills)];
+    #{} ->          spills_filter_availset(Es, Spills)
+  end;
+spills_filter_availset([], _) -> [];
+spills_filter_availset(top, _) -> top.
+
+spills_filter(Fun, Spills) -> maps:filter(fun(K, _) -> Fun(K) end, Spills).
+
+-spec analyse_scan([instr()], target(), Defset, Gen, Self, Want)
+		  -> {Defset, Gen, Self, Want, HasCall} when
+    HasCall :: false | {true, regset()},
+    Defset  :: regset(),
+    Gen     :: availset(),
+    Self    :: regset(),
+    Want    :: regset().
+analyse_scan([], _Target, Defs, Gen, Self, Want) ->
+  {Defs, Gen, Self, Want, false};
+analyse_scan([I|Is], Target, Defs0, Gen0, Self0, Want0) ->
+  {DefL, UseL} = reg_def_use(I, Target),
+  Use = ordsets:from_list(UseL),
+  Def = ordsets:from_list(DefL),
+  Self = ordsets:union(ordsets:intersection(Use, Gen0), Self0),
+  Want = ordsets:union(ordsets:subtract(Use, Defs0), Want0),
+  Defs = ordsets:union(Def, Defs0),
+  case defines_all_alloc(I, Target) of
+    true ->
+      [] = Is, %assertion
+      {Defs, ordsets:new(), Self, Want, {true, Def}};
+    false ->
+      Gen = ordsets:union(ordsets:union(Def, Use), Gen0),
+      analyse_scan(Is, Target, Defs, Gen, Self, Want)
+  end.
+
+-spec avail_dataf([label()], avail()) -> avail().
+avail_dataf(RPO, Avail0) ->
+  case avail_dataf_once(RPO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      avail_dataf(RPO, Avail)
+  end.
+
+-spec avail_dataf_once([label()], avail(), non_neg_integer())
+		      -> {avail(), non_neg_integer()}.
+avail_dataf_once([], Avail, Changed) -> {Avail, Changed};
+avail_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB = #avail_bb{out=OldOut, gen=Gen} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  {Changed, Avail} =
+    case availset_union(In, Gen) of
+      OldOut -> {Changed0, Avail0};
+      Out -> {Changed0+1, avail_set(L, ABB#avail_bb{out=Out}, Avail0)}
+    end,
+  avail_dataf_once(Ls, Avail, Changed).
+
+-spec analyse_filter_want([label()], avail()) -> avail().
+analyse_filter_want([], Avail) -> Avail;
+analyse_filter_want([L|Ls], Avail0) ->
+  ABB = #avail_bb{want=Want0, defin=DefIn0} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  Want = ordset_intersect_availset(Want0, In),
+  DefIn = ordset_intersect_availset(DefIn0, In),
+  Avail = avail_set(L, ABB#avail_bb{want=Want, defin=DefIn}, Avail0),
+  analyse_filter_want(Ls, Avail).
+
+-spec want_dataf([label()], avail()) -> avail().
+want_dataf(PO, Avail0) ->
+  case want_dataf_once(PO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      want_dataf(PO, Avail)
+  end.
+
+-spec want_dataf_once([label()], avail(), non_neg_integer())
+		     -> {avail(), non_neg_integer()}.
+want_dataf_once([], Avail, Changed) -> {Avail, Changed};
+want_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB0 = #avail_bb{want=OldIn,defin=OldDef} = avail_get(L, Avail0),
+  AvailIn = avail_in(L, Avail0),
+  Out = want_out(L, Avail0),
+  DefOut = def_out(L, Avail0),
+  {Changed, Avail} =
+    case {ordsets:union(ordset_intersect_availset(Out,    AvailIn), OldIn),
+	  ordsets:union(ordset_intersect_availset(DefOut, AvailIn), OldDef)}
+    of
+      {OldIn, OldDef} -> {Changed0, Avail0};
+      {In, DefIn} ->
+	ABB = ABB0#avail_bb{want=In,defin=DefIn},
+	{Changed0+1, avail_set(L, ABB, Avail0)}
+    end,
+  want_dataf_once(Ls, Avail, Changed).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Rewrite pass
+-type subst_dict() :: orddict:orddict(reg(), reg()).
+-type input()      :: #{label() => subst_dict()}.
+
+-spec rewrite(target_cfg(), target(), avail()) -> target_cfg().
+rewrite(CFG, Target, Avail) ->
+  RPO = reverse_postorder(CFG, Target),
+  rewrite(RPO, Target, Avail, #{}, CFG).
+
+-spec rewrite([label()], target(), avail(), input(), target_cfg())
+	     -> target_cfg().
+rewrite([], _Target, _Avail, _Input, CFG) -> CFG;
+rewrite([L|Ls], Target, Avail, Input0, CFG0) ->
+  SplitHere = split_in_block(L, Avail),
+  {Input1, LInput} =
+    case Input0 of
+      #{L := LInput0} -> {Input0, LInput0};
+      #{} -> {Input0#{L => []}, []} % entry block
+    end,
+  ?ASSERT([] =:= [X || X <- SplitHere, orddict:is_key(X, LInput)]),
+  ?ASSERT(want_in(L, Avail) =:= orddict:fetch_keys(LInput)),
+  {CFG1, LOutput} =
+    case {SplitHere, LInput} of
+      {[], []} -> % optimisation (rewrite will do nothing, so skip it)
+	{CFG0, LInput};
+      _ ->
+	Code0 = hipe_bb:code(BB=bb(CFG0, L, Target)),
+	DefOut = def_out(L, Avail),
+	{Code, LOutput0, _DefIn} =
+	  rewrite_instrs(Code0, Target, LInput, DefOut, SplitHere),
+	{update_bb(CFG0, L, hipe_bb:code_update(BB, Code), Target), LOutput0}
+    end,
+  {Input, CFG} = rewrite_succs(avail_succ(L, Avail), Target, L, LOutput, Avail,
+			       Input1, CFG1),
+  rewrite(Ls, Target, Avail, Input, CFG).
+
+-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()).
+renamed_in_block(L, Avail) ->
+  ordsets:union([avail_self(L, Avail), want_in(L, Avail),
+		 want_out(L, Avail)]).
+
+-spec split_in_block(label(), avail()) -> ordsets:ordset(reg()).
+split_in_block(L, Avail) ->
+  ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)),
+		   want_in(L, Avail)).
+
+-spec rewrite_instrs([instr()], target(), subst_dict(), regset(), [reg()])
+		    ->  {[instr()], subst_dict(), regset()}.
+rewrite_instrs([], _Target, Output, DefOut, []) ->
+  {[], Output, DefOut};
+rewrite_instrs([I|Is], Target, Input0, BBDefOut, SplitHere0) ->
+  {TDef, TUse} = def_use(I, Target),
+  {Def, Use} = {reg_names(TDef, Target), reg_names(TUse, Target)},
+  %% Restores are generated in forward order by picking temps from SplitHere as
+  %% they're used or defined. After the last instruction, all temps have been
+  %% picked.
+  {ISplits, SplitHere} =
+    lists:partition(fun(R) ->
+			lists:member(R, Def) orelse lists:member(R, Use)
+		    end, SplitHere0),
+  {Input, Restores} =
+    case ISplits of
+      [] -> {Input0, []};
+      _ ->
+	make_splits(ISplits, Target, TDef, TUse, Input0, [])
+    end,
+  %% Here's the recursive call
+  {Acc0, Output, DefOut} =
+    rewrite_instrs(Is, Target, Input, BBDefOut, SplitHere),
+  %% From here we're processing instructions in reverse order, because to avoid
+  %% redundant spills we need to walk the 'def' dataflow, which is in reverse.
+  SubstFun = fun(Temp) ->
+		 case orddict:find(reg_nr(Temp, Target), Input) of
+		   {ok, NewTemp} -> NewTemp;
+		   error -> Temp
+		 end
+	     end,
+  Acc1 = insert_spills(TDef, Target, Input, DefOut, Acc0),
+  Acc = Restores ++ [subst_temps(SubstFun, I, Target) | Acc1],
+  DefIn = ordsets:union(DefOut, ordsets:from_list(Def)),
+  {Acc, Output, DefIn}.
+
+-spec make_splits([reg()], target(), [temp()], [temp()], subst_dict(),
+		  [instr()])
+		 -> {subst_dict(), [instr()]}.
+make_splits([], _Target, _TDef, _TUse, Input, Acc) ->
+  {Input, Acc};
+make_splits([S|Ss], Target, TDef, TUse, Input0, Acc0) ->
+  SubstReg = new_reg_nr(Target),
+  {Acc, Subst} =
+    case find_reg_temp(S, TUse, Target) of
+      error ->
+	{ok, Temp} = find_reg_temp(S, TDef, Target),
+	{Acc0, update_reg_nr(SubstReg, Temp, Target)};
+      {ok, Temp} ->
+	Subst0 = update_reg_nr(SubstReg, Temp, Target),
+	Acc1 = [mk_move(Temp, Subst0, Target) | Acc0],
+	{Acc1, Subst0}
+    end,
+  Input = orddict:store(S, Subst, Input0),
+  make_splits(Ss, Target, TDef, TUse, Input, Acc).
+
+-spec find_reg_temp(reg(), [temp()], target()) -> error | {ok, temp()}.
+find_reg_temp(_Reg, [], _Target) -> error;
+find_reg_temp(Reg, [T|Ts], Target) ->
+  case reg_nr(T, Target) of
+    Reg -> {ok, T};
+    _ -> find_reg_temp(Reg, Ts, Target)
+  end.
+
+-spec insert_spills([temp()], target(), subst_dict(), regset(), [instr()])
+		   -> [instr()].
+insert_spills([], _Target, _Input, _DefOut, Acc) -> Acc;
+insert_spills([T|Ts], Target, Input, DefOut, Acc0) ->
+  R = reg_nr(T, Target),
+  Acc =
+    case orddict:find(R, Input) of
+      error -> Acc0;
+      {ok, Subst} ->
+	case lists:member(R, DefOut) of
+	  true -> Acc0;
+	  false -> [mk_move(Subst, T, Target) | Acc0]
+	end
+    end,
+  insert_spills(Ts, Target, Input, DefOut, Acc).
+
+-spec rewrite_succs([label()], target(), label(), subst_dict(), avail(),
+		    input(), target_cfg()) -> {input(), target_cfg()}.
+rewrite_succs([], _Target, _P, _POutput, _Avail, Input, CFG) -> {Input, CFG};
+rewrite_succs([L|Ls], Target, P, POutput, Avail, Input0, CFG0) ->
+  NewLInput = orddict_with_ordset(want_in(L, Avail), POutput),
+  {Input, CFG} =
+    case Input0 of
+      #{L := LInput} ->
+	CFG2 =
+	  case required_phi_moves(LInput, NewLInput) of
+	    [] -> CFG0;
+	    ReqMovs ->
+	      PhiLb = new_label(Target),
+	      Code = [mk_move(S,D,Target) || {S,D} <- ReqMovs]
+		++ [mk_goto(L, Target)],
+	      PhiBB = hipe_bb:mk_bb(Code),
+	      CFG1 = update_bb(CFG0, PhiLb, PhiBB, Target),
+	      bb_redirect_jmp(L, PhiLb, P, CFG1, Target)
+	  end,
+	{Input0, CFG2};
+      #{} ->
+	{Input0#{L => NewLInput}, CFG0}
+    end,
+  rewrite_succs(Ls, Target, P, POutput, Avail, Input, CFG).
+
+-spec bb_redirect_jmp(label(), label(), label(), target_cfg(), target())
+		     -> target_cfg().
+bb_redirect_jmp(From, To, Lb, CFG, Target) ->
+  BB0 = bb(CFG, Lb, Target),
+  Last = redirect_jmp(hipe_bb:last(BB0), From, To, Target),
+  BB = hipe_bb:code_update(BB0, hipe_bb:butlast(BB0) ++ [Last]),
+  update_bb(CFG, Lb, BB, Target).
+
+-spec required_phi_moves(subst_dict(), subst_dict()) -> [{reg(), reg()}].
+required_phi_moves([], []) -> [];
+required_phi_moves([P|Is], [P|Os]) -> required_phi_moves(Is, Os);
+required_phi_moves([{K, In}|Is], [{K, Out}|Os]) ->
+  [{Out, In}|required_phi_moves(Is, Os)].
+
+%% @doc Returns a new orddict with the keys in Set and their associated values.
+-spec orddict_with_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			 -> orddict:orddict(K, V).
+orddict_with_ordset([S|Ss], [{K, _}|_]=Dict) when S < K ->
+  orddict_with_ordset(Ss, Dict);
+orddict_with_ordset([S|_]=Set, [{K, _}|Ds]) when S > K ->
+  orddict_with_ordset(Set, Ds);
+orddict_with_ordset([_S|Ss], [{_K, _}=P|Ds]) -> % _S == _K
+  [P|orddict_with_ordset(Ss, Ds)];
+orddict_with_ordset([], _) -> [];
+orddict_with_ordset(_, []) -> [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(labels).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
+
+reg_def_use(I, Target) ->
+  {TDef, TUse} = def_use(I, Target),
+  {reg_names(TDef, Target), reg_names(TUse, Target)}.
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 31fca81316..78b6379eba 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,24 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_sparc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index 050d65e1a9..485fdc212a 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -108,6 +116,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(I, Ctx) ->
@@ -125,9 +136,24 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_sparc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_sparc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index c1c8dbbcd6..dacfb71b00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -46,6 +46,7 @@
 	 def_use/2,
 	 is_arg/2,	% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -63,12 +64,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -156,6 +164,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% X86 stuff
 
 def_use(Instruction,_) ->
@@ -200,9 +211,33 @@ is_move(Instruction,_) ->
     false -> false
   end.
 
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg,_) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
index 4b4c83f76d..3fe49e1f00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
@@ -47,6 +47,7 @@
 	 uses/2,
 	 defines/2,
 	 defines_all_alloc/2,
+	 is_spill_move/2,
 	 is_global/2,
 	 reg_nr/2,
 	 physical_name/2,
@@ -158,6 +159,9 @@ defines(I, _) ->
 
 defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I).
 
+is_spill_move(I, _) ->
+  hipe_x86:is_pseudo_spill_fmove(I).
+
 temp_is_double(Temp) ->
   hipe_x86:temp_type(Temp) =:= 'double'.