88 files changed, 3608 insertions, 489 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile
index 617f6749ac..d0da8cdff6 100644
--- a/lib/hipe/amd64/Makefile
+++ b/lib/hipe/amd64/Makefile
@@ -128,6 +128,7 @@ $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl
 $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl
 $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl
 $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl
+$(EBIN)/hipe_amd64_subst.beam: ../x86/hipe_x86_subst.erl
 $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl
 $(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl
 $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl
diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl
index f8cc0c7d83..bda2824ffc 100644
--- a/lib/hipe/amd64/hipe_amd64_encode.erl
+++ b/lib/hipe/amd64/hipe_amd64_encode.erl
@@ -1316,6 +1316,7 @@ dotest1(OS) ->
     RM64 = {rm64,rm_reg(?EDX)},
     RM32 = {rm32,rm_reg(?EDX)},
     RM16 = {rm16,rm_reg(?EDX)},
+    RM16REX = {rm16,rm_reg(?R13)},
     RM8 = {rm8,rm_reg(?EDX)},
     RM8REX = {rm8,rm_reg(?SIL)},
     Rel32 = {rel32,Word32},
@@ -1479,6 +1480,7 @@ dotest1(OS) ->
     t(OS,'test',{RM8,Imm8}),
     t(OS,'test',{RM8REX,Imm8}),
     t(OS,'test',{RM16,Imm16}),
+    t(OS,'test',{RM16REX,Imm16}),
     t(OS,'test',{RM32,Imm32}),
     t(OS,'test',{RM64,Imm32}),
     t(OS,'test',{RM32,Reg32}),
diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
index 8a3ea92156..891c874a15 100644
--- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
+++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
@@ -53,6 +53,8 @@ do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
       do_fp_unop(I, TempMap, Strategy);
     #fp_binop{} ->
       do_fp_binop(I, TempMap, Strategy);
+    #pseudo_spill_fmove{} ->
+      do_pseudo_spill_fmove(I, TempMap, Strategy);
     _ ->
       %% All non sse2 ops
       {[I], false}
@@ -95,8 +97,13 @@ do_fmove(I, TempMap, Strategy) ->
   of
     true ->
       Tmp = spill_temp(double, Strategy),
-      {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}],
-       true};
+      %% pseudo_spill_fmove allows spill slot move coalescing, but must not
+      %% contain memory operands (except for spilled temps)
+      Is = case is_float_temp(Src) andalso is_float_temp(Dst) of
+	     true -> [#pseudo_spill_fmove{src=Src, temp=Tmp, dst=Dst}];
+	     false -> [#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}]
+	   end,
+      {Is, true};
     false ->
       {[I], false}
   end.
@@ -104,6 +111,12 @@ do_fmove(I, TempMap, Strategy) ->
 is_float_temp(#x86_temp{type=Type}) -> Type =:= double;
 is_float_temp(#x86_mem{}) -> false.
 
+%%% Fix an pseudo_spill_fmove op.
+do_pseudo_spill_fmove(I = #pseudo_spill_fmove{temp=Temp}, TempMap, _Strategy) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = is_mem_opnd(Temp, TempMap),
+  {[I], false}. % nothing to do
+
 %%% Check if an operand denotes a memory cell (mem or pseudo).
 
 is_mem_opnd(Opnd, TempMap) ->
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl
index a4cb71a106..a5cecef5a1 100644
--- a/lib/hipe/amd64/hipe_amd64_registers.erl
+++ b/lib/hipe/amd64/hipe_amd64_registers.erl
@@ -207,19 +207,14 @@ allocatable_x87() ->
 
 nr_args() -> ?AMD64_NR_ARG_REGS.
 
-arg(N) ->
-  if N < ?AMD64_NR_ARG_REGS ->
-      case N of
-	0 -> ?ARG0;
-	1 -> ?ARG1;
-	2 -> ?ARG2;
-	3 -> ?ARG3;
-	4 -> ?ARG4;
-	5 -> ?ARG5;
-	_ -> exit({?MODULE, arg, N})
-      end;
-     true ->
-      exit({?MODULE, arg, N})
+arg(N) when N < ?AMD64_NR_ARG_REGS ->
+  case N of
+    0 -> ?ARG0;
+    1 -> ?ARG1;
+    2 -> ?ARG2;
+    3 -> ?ARG3;
+    4 -> ?ARG4;
+    5 -> ?ARG5
   end.
 
 is_arg(R) ->
@@ -240,11 +235,7 @@ args(Arity) when is_integer(Arity), Arity >= 0 ->
 args(I, Rest) when I < 0 -> Rest;
 args(I, Rest) -> args(I-1, [arg(I) | Rest]).
 
-ret(N) ->
-  case N of
-    0 -> ?RAX;
-    _ -> exit({?MODULE, ret, N})
-  end.
+ret(0) -> ?RAX.
 
 %% Note: the fact that (allocatable() UNION allocatable_x87() UNION
 %% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in
diff --git a/lib/hipe/arm/hipe_arm.erl b/lib/hipe/arm/hipe_arm.erl
index e34a00f561..3b090b501a 100644
--- a/lib/hipe/arm/hipe_arm.erl
+++ b/lib/hipe/arm/hipe_arm.erl
@@ -79,6 +79,9 @@
 	 pseudo_move_dst/1,
 	 pseudo_move_src/1,
 
+	 mk_pseudo_spill_move/3,
+	 is_pseudo_spill_move/1,
+
 	 mk_pseudo_switch/3,
 
 	 mk_pseudo_tailcall/4,
@@ -250,6 +253,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end.
 pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst.
 pseudo_move_src(#pseudo_move{src=Src}) -> Src.
 
+mk_pseudo_spill_move(Dst, Temp, Src) ->
+  #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}.
+is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move).
+
 mk_pseudo_switch(JTab, Index, Labels) ->
   #pseudo_switch{jtab=JTab, index=Index, labels=Labels}.
 
diff --git a/lib/hipe/arm/hipe_arm.hrl b/lib/hipe/arm/hipe_arm.hrl
index 67bc07634e..be06b1ebd7 100644
--- a/lib/hipe/arm/hipe_arm.hrl
+++ b/lib/hipe/arm/hipe_arm.hrl
@@ -101,6 +101,7 @@
 -record(pseudo_call_prepare, {nrstkargs}).
 -record(pseudo_li, {dst, imm, label}).	% pre-generated label for use by the assembler
 -record(pseudo_move, {dst, src}).
+-record(pseudo_spill_move, {dst, temp, src}).
 -record(pseudo_switch, {jtab, index, labels}).
 -record(pseudo_tailcall, {funv, arity, stkargs, linkage}).
 -record(pseudo_tailcall_prepare, {}).
diff --git a/lib/hipe/arm/hipe_arm_assemble.erl b/lib/hipe/arm/hipe_arm_assemble.erl
index 713c148742..9aa730afa9 100644
--- a/lib/hipe/arm/hipe_arm_assemble.erl
+++ b/lib/hipe/arm/hipe_arm_assemble.erl
@@ -31,7 +31,7 @@ assemble(CompiledCode, Closures, Exports, Options) ->
 	  || {MFA, Defun} <- CompiledCode],
   %%
   {ConstAlign,ConstSize,ConstMap,RefsFromConsts} =
-    hipe_pack_constants:pack_constants(Code, 4),
+    hipe_pack_constants:pack_constants(Code),
   %%
   {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} =
     encode(translate(Code, ConstMap), Options),
diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl
index ea6da67317..0bc3df30b9 100644
--- a/lib/hipe/arm/hipe_arm_cfg.erl
+++ b/lib/hipe/arm/hipe_arm_cfg.erl
@@ -24,6 +24,7 @@
 -export([params/1, reverse_postorder/1]).
 -export([arity/1]). % for linear scan
 %%-export([redirect_jmp/3]).
+-export([branch_preds/1]).
 
 %%% these tell cfg.inc what to define (ugly as hell)
 -define(BREADTH_ORDER,true).  % for linear scan
@@ -75,6 +76,26 @@ branch_successors(Branch) ->
     #pseudo_tailcall{} -> []
   end.
 
+branch_preds(Branch) ->
+  case Branch of
+    #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} ->
+      [{FalseLab, 1.0-Pred}, {TrueLab, Pred}];
+    #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=[]}} ->
+      %% A function can still cause an exception, even if we won't catch it
+      [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}];
+    #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=ExnLab}} ->
+      CallExnPred = hipe_bb_weights:call_exn_pred(),
+      [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}];
+    #pseudo_switch{labels=Labels} ->
+      Prob = 1.0/length(Labels),
+      [{L, Prob} || L <- Labels];
+    _ ->
+      case branch_successors(Branch) of
+	[] -> [];
+	[Single] -> [{Single, 1.0}]
+      end
+  end.
+
 -ifdef(REMOVE_TRIVIAL_BBS_NEEDED).
 fails_to(_Instr) -> [].
 -endif.
diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl
index 0e62070c6c..652299a514 100644
--- a/lib/hipe/arm/hipe_arm_defuse.erl
+++ b/lib/hipe/arm/hipe_arm_defuse.erl
@@ -40,6 +40,7 @@ insn_def_gpr(I) ->
     #pseudo_call{} -> call_clobbered_gpr();
     #pseudo_li{dst=Dst} -> [Dst];
     #pseudo_move{dst=Dst} -> [Dst];
+    #pseudo_spill_move{dst=Dst, temp=Temp} -> [Dst, Temp];
     #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr();
     #smull{dstlo=DstLo,dsthi=DstHi,src1=Src1} ->
       %% ARM requires DstLo, DstHi, and Src1 to be distinct.
@@ -83,6 +84,7 @@ insn_use_gpr(I) ->
     #pseudo_call{funv=FunV,sdesc=#arm_sdesc{arity=Arity}} ->
       funv_use(FunV, arity_use_gpr(Arity));
     #pseudo_move{src=Src} -> [Src];
+    #pseudo_spill_move{src=Src} -> [Src];
     #pseudo_switch{jtab=JTabR,index=IndexR} -> addtemp(JTabR, [IndexR]);
     #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} ->
       addargs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity))));
diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl
index e323907e31..a1004fb609 100644
--- a/lib/hipe/arm/hipe_arm_frame.erl
+++ b/lib/hipe/arm/hipe_arm_frame.erl
@@ -69,6 +69,8 @@ do_insn(I, LiveOut, Context, FPoff) ->
       do_pseudo_call_prepare(I, FPoff);
     #pseudo_move{} ->
       {do_pseudo_move(I, Context, FPoff), FPoff};
+    #pseudo_spill_move{} ->
+      {do_pseudo_spill_move(I, Context, FPoff), FPoff};
     #pseudo_tailcall{} ->
       {do_pseudo_tailcall(I, Context), context_framesize(Context)};
     _ ->
@@ -100,6 +102,26 @@ pseudo_offset(Temp, FPoff, Context) ->
   FPoff + context_offset(Context, Temp).
 
 %%%
+%%% Moves from one spill slot to another
+%%%
+
+do_pseudo_spill_move(I, Context, FPoff) ->
+  #pseudo_spill_move{dst=Dst, temp=Temp, src=Src} = I,
+  case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of
+    false -> % Register allocator changed its mind, turn back to move
+      do_pseudo_move(hipe_arm:mk_pseudo_move(Dst, Src), Context, FPoff);
+    true ->
+      SrcOffset = pseudo_offset(Src, FPoff, Context),
+      DstOffset = pseudo_offset(Dst, FPoff, Context),
+      case SrcOffset =:= DstOffset of
+	true -> []; % omit move-to-self
+	false ->
+	  mk_load('ldr', Temp, SrcOffset, mk_sp(),
+		  mk_store('str', Temp, DstOffset, mk_sp(), []))
+      end
+  end.
+
+%%%
 %%% Return - deallocate frame and emit 'ret $N' insn.
 %%%
 
diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl
index 9bfe0a9a83..80cd470708 100644
--- a/lib/hipe/arm/hipe_arm_ra_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl
@@ -25,11 +25,17 @@ ra_bb(BB, Map) ->
   hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])).
 
 ra_code([I|Insns], Map, Accum) ->
-  ra_code(Insns, Map, [ra_insn(I, Map) | Accum]);
+  ra_code(Insns, Map, ra_insn(I, Map, Accum));
 ra_code([], _Map, Accum) ->
   lists:reverse(Accum).
 
-ra_insn(I, Map) ->
+ra_insn(I, Map, Accum) ->
+  case I of
+    #pseudo_move{} -> ra_pseudo_move(I, Map, Accum);
+    _ -> [ra_insn_1(I, Map) | Accum]
+  end.
+
+ra_insn_1(I, Map) ->
   case I of
     #alu{} -> ra_alu(I, Map);
     #cmp{} -> ra_cmp(I, Map);
@@ -38,7 +44,7 @@ ra_insn(I, Map) ->
     #move{} -> ra_move(I, Map);
     #pseudo_call{} -> ra_pseudo_call(I, Map);
     #pseudo_li{} -> ra_pseudo_li(I, Map);
-    #pseudo_move{} -> ra_pseudo_move(I, Map);
+    #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map);
     #pseudo_switch{} -> ra_pseudo_switch(I, Map);
     #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map);
     #smull{} -> ra_smull(I, Map);
@@ -80,10 +86,19 @@ ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) ->
   NewDst = ra_temp(Dst, Map),
   I#pseudo_li{dst=NewDst}.
 
-ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) ->
+ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map, Accum) ->
+  NewDst = ra_temp(Dst, Map),
+  NewSrc = ra_temp(Src, Map),
+  case NewSrc#arm_temp.reg =:= NewDst#arm_temp.reg of
+    true -> Accum;
+    false -> [I#pseudo_move{dst=NewDst,src=NewSrc} | Accum]
+  end.
+
+ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) ->
   NewDst = ra_temp(Dst, Map),
+  NewTemp = ra_temp(Temp, Map),
   NewSrc = ra_temp(Src, Map),
-  I#pseudo_move{dst=NewDst,src=NewSrc}.
+  I#pseudo_spill_move{dst=NewDst, temp=NewTemp, src=NewSrc}.
 
 ra_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, Map) ->
   NewJTab = ra_temp(JTab, Map),
diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl
index 8d1ee1cb94..23c305511f 100644
--- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl
+++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl
@@ -56,6 +56,7 @@ do_insn(I, TempMap, Strategy) ->
     #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy);
     #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy);
     #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy);
+    #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy);
     #pseudo_switch{} -> do_pseudo_switch(I, TempMap, Strategy);
     #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy);
     #smull{} -> do_smull(I, TempMap, Strategy);
@@ -108,18 +109,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) ->
 
 do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) ->
   %% Either Dst or Src (but not both) may be a pseudo temp.
-  %% pseudo_move and pseudo_tailcall are special cases: in
-  %% all other instructions, all temps must be non-pseudos
-  %% after register allocation.
-  case temp_is_spilled(Dst, TempMap) of
-    true -> % Src must not be a pseudo
-      {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy),
-      NewI = I#pseudo_move{src=NewSrc},
-      {FixSrc ++ [NewI], DidSpill};
+  %% pseudo_move, pseudo_spill_move, and pseudo_tailcall
+  %% are special cases: in all other instructions, all
+  %% temps must be non-pseudos after register allocation.
+  case temp_is_spilled(Dst, TempMap)
+    andalso temp_is_spilled(Dst, TempMap)
+  of
+    true -> % Turn into pseudo_spill_move
+      Temp = clone(Src, temp1(Strategy)),
+      NewI = #pseudo_spill_move{dst=Dst, temp=Temp, src=Src},
+      {[NewI], true};
     _ ->
       {[I], false}
   end.
 
+do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = temp_is_spilled(Temp, TempMap),
+  {[I], false}. % nothing to do
+
 do_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, TempMap, Strategy) ->
   {FixJTab,NewJTab,DidSpill1} = fix_src1(JTab, TempMap, Strategy),
   {FixIndex,NewIndex,DidSpill2} = fix_src2(Index, TempMap, Strategy),
diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl
index 7510c197bd..4ff245f414 100644
--- a/lib/hipe/arm/hipe_arm_subst.erl
+++ b/lib/hipe/arm/hipe_arm_subst.erl
@@ -13,7 +13,7 @@
 %% limitations under the License.
 
 -module(hipe_arm_subst).
--export([insn_temps/2]).
+-export([insn_temps/2, insn_lbls/2]).
 -include("hipe_arm.hrl").
 
 %% These should be moved to hipe_arm and exported
@@ -31,6 +31,7 @@
 -type am3()     :: #am3{}.
 -type arg()     :: temp() | integer().
 -type funv()    :: #arm_mfa{} | #arm_prim{} | temp().
+-type label()   :: non_neg_integer().
 -type insn()    :: tuple(). % for now
 
 -type subst_fun() :: fun((temp()) -> temp()).
@@ -58,6 +59,8 @@ insn_temps(T, I) ->
       #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)};
       #pseudo_call_prepare{} -> I;
       #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)};
+      #pseudo_spill_move{dst=D,temp=U,src=S} ->
+	  I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)};
       #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} ->
 	  I#pseudo_switch{jtab=T(J),index=T(Ix)};
       #pseudo_tailcall{funv=F,stkargs=Stk} ->
@@ -103,3 +106,22 @@ funv_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T).
 -spec arg_temps(subst_fun(), arg()) -> arg().
 arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm;
 arg_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T).
+
+-type lbl_subst_fun() :: fun((label()) -> label()).
+
+%% @doc Maps over the branch targets in an instruction
+-spec insn_lbls(lbl_subst_fun(), insn()) -> insn().
+insn_lbls(SubstLbl, I) ->
+  case I of
+    #b_label{label=Label} ->
+      I#b_label{label=SubstLbl(Label)};
+    #pseudo_bc{true_label=T, false_label=F} ->
+      I#pseudo_bc{true_label=SubstLbl(T), false_label=SubstLbl(F)};
+    #pseudo_call{sdesc=Sdesc, contlab=Contlab} ->
+      I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc),
+		    contlab=SubstLbl(Contlab)}
+  end.
+
+sdesc_lbls(_SubstLbl, Sdesc=#arm_sdesc{exnlab=[]}) -> Sdesc;
+sdesc_lbls(SubstLbl, Sdesc=#arm_sdesc{exnlab=Exnlab}) ->
+  Sdesc#arm_sdesc{exnlab=SubstLbl(Exnlab)}.
diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl
index 8c96e60229..a3a936322a 100644
--- a/lib/hipe/cerl/erl_bif_types.erl
+++ b/lib/hipe/cerl/erl_bif_types.erl
@@ -1080,9 +1080,6 @@ type(hipe_bifs, ref_get, 1, Xs, Opaques) ->
   strict(hipe_bifs, ref_get, 1, Xs, fun (_) -> t_immediate() end, Opaques);
 type(hipe_bifs, ref_set, 2, Xs, Opaques) ->
   strict(hipe_bifs, ref_set, 2, Xs, fun (_) -> t_nil() end, Opaques);
-type(hipe_bifs, remove_refs_from, 1, Xs, Opaques) ->
-  strict(hipe_bifs, remove_refs_from, 1, Xs,
-	 fun (_) -> t_atom('ok') end, Opaques);
 type(hipe_bifs, set_funinfo_native_address, 3, Xs, Opaques) ->
   strict(hipe_bifs, set_funinfo_native_address, 3, Xs,
 	 fun (_) -> t_nil() end, Opaques);
@@ -2029,17 +2026,14 @@ arith_rem(Min1, Max1, Min2, Max2) ->
   Min1_geq_zero = infinity_geq(Min1, 0),
   Max1_leq_zero = infinity_geq(0, Max1),
   Max_range2 = infinity_max([infinity_abs(Min2), infinity_abs(Max2)]),
-  Max_range2_leq_zero = infinity_geq(0, Max_range2),
-  New_min = 
+  New_min =
     if Min1_geq_zero -> 0;
        Max_range2 =:= 0 -> 0;
-       Max_range2_leq_zero -> infinity_add(Max_range2, 1);
        true -> infinity_add(infinity_inv(Max_range2), 1)
     end,
   New_max = 
     if Max1_leq_zero -> 0;
        Max_range2 =:= 0 -> 0;
-       Max_range2_leq_zero -> infinity_add(infinity_inv(Max_range2), -1);
        true -> infinity_add(Max_range2, -1)
     end,
   {New_min, New_max}.
@@ -2521,8 +2515,6 @@ arg_types(hipe_bifs, ref_get, 1) ->
   [t_hiperef()];
 arg_types(hipe_bifs, ref_set, 2) ->
   [t_hiperef(), t_immediate()];
-arg_types(hipe_bifs, remove_refs_from, 1) ->
-  [t_sup([t_mfa(), t_atom('all')])];
 arg_types(hipe_bifs, set_funinfo_native_address, 3) ->
   arg_types(hipe_bifs, set_native_address, 3);
 arg_types(hipe_bifs, commit_patch_load, 1) ->
diff --git a/lib/hipe/doc/src/notes.xml b/lib/hipe/doc/src/notes.xml
index 314fd55ba3..58ca0b2138 100644
--- a/lib/hipe/doc/src/notes.xml
+++ b/lib/hipe/doc/src/notes.xml
@@ -31,6 +31,26 @@
   </header>
   <p>This document describes the changes made to HiPE.</p>
 
+<section><title>Hipe 3.15.4</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p> Fix a bug concerning parameterized opaque types. </p>
+          <p>
+	    Own Id: OTP-14130</p>
+        </item>
+        <item>
+          <p>
+	    Fixed xml issues in old release notes</p>
+          <p>
+	    Own Id: OTP-14269</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>Hipe 3.15.3</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
@@ -130,12 +150,12 @@
         </item>
         <item>
           <p>
-	    Various fixes and improvements to the HiPE LLVM backend.
+	    Various fixes and improvements to the HiPE LLVM backend.</p>
 	    <list> <item>Add support for LLVM 3.7 and 3.8 in the
 	    HiPE/LLVM x86_64 backend</item> <item>Reinstate support
 	    for the LLVM backend on x86 (works OK for LLVM 3.5 to 3.7
 	    -- LLVM 3.8 has a bug that prevents it from generating
-	    correct native code on x86)</item> </list></p>
+	    correct native code on x86)</item> </list>
           <p>
 	    Own Id: OTP-13626</p>
         </item>
@@ -191,7 +211,7 @@
         <item>
           <p>
 	    Fix various binary construction inconsistencies for hipe
-	    compiled code. <list> <item>Passing bad field sizes to
+	    compiled code.</p> <list> <item>Passing bad field sizes to
 	    binary constructions would throw <c>badarith</c> rather
 	    than <c>badarg</c>. Worse, in guards, when the unit size
 	    of the field was 1, the exception would leak rather than
@@ -211,7 +231,7 @@
 	    missing check for unit size match when inserting a
 	    binary. For example, a faulty expression like
 	    <c>&lt;&lt;&lt;&lt;1:7&gt;&gt;/binary&gt;&gt;</c> would
-	    succeed.</item> </list></p>
+	    succeed.</item> </list>
           <p>
 	    Own Id: OTP-13272</p>
         </item>
diff --git a/lib/hipe/flow/ebb.inc b/lib/hipe/flow/ebb.inc
index 58213e44d5..e4b7fd0efb 100644
--- a/lib/hipe/flow/ebb.inc
+++ b/lib/hipe/flow/ebb.inc
@@ -40,12 +40,14 @@
 %%      |  {ebb_leaf, SuccesorLabel}
 %%--------------------------------------------------------------------
 
-%% XXX: Cheating big time! no recursive types
--type ebb() :: {ebb_node, icode_lbl(), _}
-             | {ebb_leaf, icode_lbl()}.
+-type ebb() :: ebb_node()
+	     | ebb_leaf().
 
 -record(ebb_node, {label :: icode_lbl(), successors :: [ebb()]}).
+-type ebb_node() :: #ebb_node{}.
+
 -record(ebb_leaf, {successor :: icode_lbl()}).
+-type ebb_leaf() :: #ebb_leaf{}.
 
 %%--------------------------------------------------------------------
 %% Returns a list of extended basic blocks.
@@ -193,7 +195,7 @@ add_succ([Lbl|Lbls], Visited, Node, MkFun, EBBs, CFG) ->
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
--spec mk_node(icode_lbl(), [ebb()]) -> #ebb_node{}.
+-spec mk_node(icode_lbl(), [ebb()]) -> ebb_node().
 mk_node(Label, Successors) -> #ebb_node{label=Label, successors=Successors}.
 
 -spec node_label(#ebb_node{}) -> icode_lbl().
@@ -202,11 +204,11 @@ node_label(#ebb_node{label=Label}) -> Label.
 -spec node_successors(#ebb_node{}) -> [ebb()].
 node_successors(#ebb_node{successors=Successors}) -> Successors.
 
--spec mk_leaf(icode_lbl()) -> #ebb_leaf{}.
+-spec mk_leaf(icode_lbl()) -> ebb_leaf().
 mk_leaf(NextEbb) -> #ebb_leaf{successor=NextEbb}.
 %% leaf_next(Leaf) -> Leaf#ebb_leaf.successor.
 
--spec type(#ebb_node{}) -> 'node' ; (#ebb_leaf{}) -> 'leaf'.
+-spec type(ebb_node()) -> 'node' ; (ebb_leaf()) -> 'leaf'.
 type(#ebb_node{}) -> node;
 type(#ebb_leaf{}) -> leaf.
 
diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl
index 100bc0b0e2..2abecf7f18 100644
--- a/lib/hipe/icode/hipe_beam_to_icode.erl
+++ b/lib/hipe/icode/hipe_beam_to_icode.erl
@@ -148,7 +148,8 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) ->
 		  {Code3,_Env3} = mk_debug_calltrace(MFA, Env1, Code2),
 		  {Code3,_Env3} = {Code2,Env1}),
   %% For stack optimization
-  Leafness = leafness(Code3),
+  IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure,
+  Leafness = leafness(Code3, IsClosure),
   IsLeaf = is_leaf_code(Leafness),
   Code4 =
     [FunLbl |
@@ -156,7 +157,6 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) ->
        false -> Code3;
        true -> [mk_redtest()|Code3]
      end],
-  IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure,
   Code5 = hipe_icode:mk_icode(MFA, FunArgs, IsClosure, IsLeaf,
 			      remove_dead_code(Code4),
 			      hipe_gensym:var_range(icode),
@@ -173,12 +173,12 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) ->
 
 mk_redtest() -> hipe_icode:mk_primop([], redtest, []).
 
-leafness(Is) -> % -> true, selfrec, or false
-  leafness(Is, true).
+leafness(Is, IsClosure) -> % -> true, selfrec, closure, or false
+  leafness(Is, IsClosure, true).
 
-leafness([], Leafness) ->
+leafness([], _IsClosure, Leafness) ->
   Leafness;
-leafness([I|Is], Leafness) ->
+leafness([I|Is], IsClosure, Leafness) ->
   case I of
     #icode_comment{} ->
       %% BEAM self-tailcalls become gotos, but they leave
@@ -191,7 +191,7 @@ leafness([I|Is], Leafness) ->
 	  'self_tail_recursive' -> selfrec;	% call_only to selfrec
 	  _ -> Leafness
 	end,
-      leafness(Is, NewLeafness);
+      leafness(Is, IsClosure, NewLeafness);
     #icode_call{} ->
       case hipe_icode:call_type(I) of
 	'primop' -> 
@@ -199,12 +199,12 @@ leafness([I|Is], Leafness) ->
 	    call_fun -> false;		% Calls closure
 	    enter_fun -> false;		% Calls closure
 	    #apply_N{} -> false;
-	    _ -> leafness(Is, Leafness)	% Other primop calls are ok
+	    _ -> leafness(Is, IsClosure, Leafness) % Other primop calls are ok
 	  end;
 	T when T =:= 'local' orelse T =:= 'remote' ->
 	  {M,F,A} = hipe_icode:call_fun(I),
 	  case erlang:is_builtin(M, F, A) of
-	    true -> leafness(Is, Leafness);
+	    true -> leafness(Is, IsClosure, Leafness);
 	    false -> false
 	  end
       end;
@@ -223,11 +223,12 @@ leafness([I|Is], Leafness) ->
 	T when T =:= 'local' orelse T =:= 'remote' ->
 	  {M,F,A} = hipe_icode:enter_fun(I),
 	  case erlang:is_builtin(M, F, A) of
-	    true -> leafness(Is, Leafness);
+	    true -> leafness(Is, IsClosure, Leafness);
+	    _ when IsClosure -> leafness(Is, IsClosure, closure);
 	    _ -> false
 	  end
       end;
-    _ -> leafness(Is, Leafness)
+    _ -> leafness(Is, IsClosure, Leafness)
   end.
 
 %% XXX: this old stuff is passed around but essentially unused
@@ -235,12 +236,20 @@ is_leaf_code(Leafness) ->
   case Leafness of
     true -> true;
     selfrec -> true;
+    closure -> false;
     false -> false
   end.
 
 needs_redtest(Leafness) ->
   case Leafness of
     true -> false;
+    %% A "leaf" closure may contain tailcalls to non-closures in addition to
+    %% what other leaves may contain. Omitting the redtest is useful to generate
+    %% shorter code for closures generated by (fun F/A), and is safe since
+    %% control flow cannot return to a "leaf" closure again without a reduction
+    %% being consumed. This is true since no function that can call a closure
+    %% will ever have its redtest omitted.
+    closure -> false;
     selfrec -> true;
     false -> true
   end.
@@ -504,6 +513,19 @@ trans_fun([{test,test_arity,{f,Lbl},[Reg,N]}|Instructions], Env) ->
   I = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, 
 			 hipe_icode:label_name(True),map_label(Lbl)),
   [I,True | trans_fun(Instructions,Env)];
+%%--- test_is_tagged_tuple  ---
+trans_fun([{test,is_tagged_tuple,{f,Lbl},[Reg,N,Atom]}|Instructions], Env) ->
+  TrueArity = mk_label(new),
+  IArity = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N},
+			       hipe_icode:label_name(TrueArity),map_label(Lbl)),
+  Var = hipe_icode:mk_new_var(),
+  IGet = hipe_icode:mk_primop([Var],
+			      #unsafe_element{index=1},
+			      [trans_arg(Reg)]),
+  TrueAtom = mk_label(new),
+  IEQ = hipe_icode:mk_type([Var], Atom, hipe_icode:label_name(TrueAtom),
+			   map_label(Lbl)),
+  [IArity,TrueArity,IGet,IEQ,TrueAtom | trans_fun(Instructions,Env)];
 %%--- is_map ---
 trans_fun([{test,is_map,{f,Lbl},[Arg]}|Instructions], Env) ->
   {Code,Env1} = trans_type_test(map,Lbl,Arg,Env),
diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl
index b884132327..287b1c80fe 100644
--- a/lib/hipe/icode/hipe_icode_range.erl
+++ b/lib/hipe/icode/hipe_icode_range.erl
@@ -392,14 +392,17 @@ widen(#range{range=Old}, #range{range=New}, T = #range{range=Wide}) ->
 -spec analyse_call(#icode_call{}, call_fun()) -> #icode_call{}.
 
 analyse_call(Call, LookupFun) ->
+  Args = hipe_icode:args(Call),
+  Fun = hipe_icode:call_fun(Call),
+  Type = hipe_icode:call_type(Call),
+  %% This call has side-effects (it might call LookupFun which sends messages to
+  %% hipe_icode_coordinator to update the argument ranges of Fun), and must thus
+  %% not be moved into the case statement.
+  DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun),
   case hipe_icode:call_dstlist(Call) of
     [] ->
       Call;
     Dsts ->
-      Args = hipe_icode:args(Call),
-      Fun = hipe_icode:call_fun(Call),
-      Type = hipe_icode:call_type(Call),
-      DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun),
       NewDefs = [update_info(Var, R) || {Var,R} <- lists:zip(Dsts, DstRanges)],
       hipe_icode:subst_defines(lists:zip(Dsts, NewDefs), Call)
   end.
@@ -1306,16 +1309,15 @@ range_rem(Range1, Range2) ->
   Min1_geq_zero = inf_geq(Min1, 0),
   Max1_leq_zero = inf_geq(0, Max1),
   Max_range2 = inf_max([inf_abs(Min2), inf_abs(Max2)]),
-  Max_range2_leq_zero = inf_geq(0, Max_range2),
   New_min = 
     if Min1_geq_zero ->	0;
-       Max_range2_leq_zero -> Max_range2;
-       true -> inf_inv(Max_range2)
+       Max_range2 =:= 0 -> 0;
+       true -> inf_add(inf_inv(Max_range2), 1)
     end,
   New_max = 
     if Max1_leq_zero -> 0;
-       Max_range2_leq_zero -> inf_inv(Max_range2);
-       true -> Max_range2
+       Max_range2 =:= 0 -> 0;
+       true -> inf_add(Max_range2, -1)
     end,
   range_init({New_min, New_max}, false).
 
diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl
index 815d1e57a8..aafaeb5a0a 100644
--- a/lib/hipe/icode/hipe_icode_type.erl
+++ b/lib/hipe/icode/hipe_icode_type.erl
@@ -1410,9 +1410,10 @@ transform_element2(I) ->
   NewIndex =
     case test_type(integer, IndexType) of
       true ->
-	case t_number_vals(IndexType) of
-	  unknown -> unknown;
-	  [_|_] = Vals -> {number, Vals}
+	case {number_min(IndexType), number_max(IndexType)} of
+	  {Lb0, Ub0} when is_integer(Lb0), is_integer(Ub0) ->
+	    {number, Lb0, Ub0};
+	  {_, _} -> unknown
 	end;
       _ -> unknown
     end,
@@ -1427,19 +1428,19 @@ transform_element2(I) ->
       _ -> unknown
     end,
   case {NewIndex, MinSize} of
-    {{number, [_|_] = Ns}, {tuple, A}} when is_integer(A) ->
-      case lists:all(fun(X) -> 0 < X andalso X =< A end, Ns) of
+    {{number, Lb, Ub}, {tuple, A}} when is_integer(A) ->
+      case 0 < Lb andalso Ub =< A of
 	true ->
-	  case Ns of
-	    [Idx] ->
+	  case {Lb, Ub} of
+	    {Idx, Idx} ->
 	      [_, Tuple] = hipe_icode:args(I),
 	      update_call_or_enter(I, #unsafe_element{index = Idx}, [Tuple]);
-	    [_|_] ->
+	    {_, _} ->
 	      NewFun = {element, [MinSize, valid]},
 	      update_call_or_enter(I, NewFun)
 	  end;
 	false ->
-	  case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, Ns) of
+	  case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, [Lb, Ub]) of
 	    true ->
 	      NewFun = {element, [MinSize, fixnums]},
 	      update_call_or_enter(I, NewFun);
@@ -1454,7 +1455,7 @@ transform_element2(I) ->
 	  NewFun = {element, [MinSize, fixnums]},
 	  update_call_or_enter(I, NewFun);
 	false ->
-	  NewFun = {element, [MinSize, NewIndex]},	  
+	  NewFun = {element, [MinSize, NewIndex]},
 	  update_call_or_enter(I, NewFun)
       end
   end.
diff --git a/lib/hipe/llvm/Makefile b/lib/hipe/llvm/Makefile
index 88016a7d8b..e8d9a0e8bb 100644
--- a/lib/hipe/llvm/Makefile
+++ b/lib/hipe/llvm/Makefile
@@ -73,8 +73,7 @@ include ../native.mk
 ERL_COMPILE_FLAGS += -Werror +inline +warn_export_vars #+warn_missing_spec
 
 # if in 32 bit backend define BIT32 symbol
-ARCH = $(shell echo $(TARGET) | sed 's/^\(x86_64\)-.*/64bit/')
-ifneq ($(ARCH), 64bit)
+ifneq ($(BITS64),yes)
 ERL_COMPILE_FLAGS += -DBIT32
 endif
 
diff --git a/lib/hipe/llvm/hipe_llvm_main.erl b/lib/hipe/llvm/hipe_llvm_main.erl
index 0957dd4df2..4eec0c752b 100644
--- a/lib/hipe/llvm/hipe_llvm_main.erl
+++ b/lib/hipe/llvm/hipe_llvm_main.erl
@@ -108,8 +108,10 @@ llvm_llc(Dir, Filename, Ver, Options) ->
   OptLevel = trans_optlev_flag(llc, Options),
   VerFlags = llc_ver_flags(Ver),
   Align    = find_stack_alignment(),
+  Target   = llc_target_opt(),
   LlcFlags = [OptLevel, "-code-model=medium", "-stack-alignment=" ++ Align
              , "-tailcallopt", "-filetype=asm" %FIXME
+             , Target
              | VerFlags],
   Command  = "llc " ++ fix_opts(LlcFlags) ++ " " ++ Source,
   %% io:format("LLC: ~s~n", [Command]),
@@ -123,7 +125,8 @@ llvm_llc(Dir, Filename, Ver, Options) ->
 compile(Dir, Fun_Name, Compiler) ->
   Source  = Dir ++ Fun_Name ++ ".s",
   Dest    = Dir ++ Fun_Name ++ ".o",
-  Command = Compiler ++ " -c " ++ Source ++ " -o " ++ Dest,
+  Target  = compiler_target_opt(),
+  Command = Compiler ++ " " ++ Target ++ " -c " ++ Source ++ " -o " ++ Dest,
   %% io:format("~s: ~s~n", [Compiler, Command]),
   case os:cmd(Command) of
     "" -> ok;
@@ -137,6 +140,18 @@ find_stack_alignment() ->
     _ -> exit({?MODULE, find_stack_alignment, "Unimplemented architecture"})
   end.
 
+llc_target_opt() ->
+  case get(hipe_target_arch) of
+    x86 -> "-march=x86";
+    amd64 -> "-march=x86-64"
+  end.
+
+compiler_target_opt() ->
+  case get(hipe_target_arch) of
+    x86 -> "-m32";
+    amd64 -> "-m64"
+  end.
+
 %% @doc Join options.
 fix_opts(Opts) ->
   string:join(Opts, " ").
diff --git a/lib/hipe/llvm/hipe_llvm_merge.erl b/lib/hipe/llvm/hipe_llvm_merge.erl
index 6e891ac3b0..58d862fbb2 100644
--- a/lib/hipe/llvm/hipe_llvm_merge.erl
+++ b/lib/hipe/llvm/hipe_llvm_merge.erl
@@ -13,7 +13,7 @@ finalize(CompiledCode, Closures, Exports) ->
   Code = [{MFA, [], ConstTab}
 	  || {MFA, _, _ , ConstTab, _, _} <- CompiledCode1],
   {ConstAlign, ConstSize, ConstMap, RefsFromConsts} =
-    hipe_pack_constants:pack_constants(Code, ?ARCH_REGISTERS:alignment()),
+    hipe_pack_constants:pack_constants(Code),
   %% Compute total code size separately as a sanity check for alignment
   CodeSize = compute_code_size(CompiledCode1, 0),
   %% io:format("Code Size (pre-computed): ~w~n", [CodeSize]),
diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
index 208d86841f..79e1bfd381 100644
--- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl
+++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl
@@ -1364,7 +1364,7 @@ create_function_definition(Fun, Params, Code, LocalVars) ->
   EntryBlock =
     lists:flatten([EntryLabel, ExceptionSync, I2, LocalVars, StoredParams, I3]),
   Final_Code = EntryBlock ++ Code,
-  FunctionOptions = [nounwind, noredzone, list_to_atom("gc \"erlang\"")],
+  FunctionOptions = [nounwind, noredzone, 'gc "erlang"'],
   WordTy = hipe_llvm:mk_int(?BITS_IN_WORD),
   FunRetTy = hipe_llvm:mk_struct(lists:duplicate(?NR_PINNED_REGS + 1, WordTy)),
   hipe_llvm:mk_fun_def([], [], "cc 11", [], FunRetTy, FunctionName, Args,
diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src
index af2c02006d..de0b255c01 100644
--- a/lib/hipe/main/hipe.app.src
+++ b/lib/hipe/main/hipe.app.src
@@ -76,6 +76,7 @@
 	     hipe_arm_specific,
 	     hipe_arm_subst,
 	     hipe_bb,
+	     hipe_bb_weights,
 	     hipe_beam_to_icode,
 	     hipe_coalescing_regalloc,
 	     hipe_consttab,
@@ -83,6 +84,7 @@
 	     hipe_digraph,
 	     hipe_dominators,
 	     hipe_dot,
+	     hipe_dsets,
 	     hipe_gen_cfg,
 	     hipe_gensym,
 	     hipe_graph_coloring_regalloc,
@@ -146,9 +148,11 @@
 	     hipe_ppc_specific_fp,
 	     hipe_ppc_subst,
 	     hipe_profile,
+	     hipe_range_split,
 	     hipe_reg_worklists,
 	     hipe_regalloc_loop,
 	     hipe_regalloc_prepass,
+	     hipe_restore_reuse,
 	     hipe_rtl,
 	     hipe_rtl_arch,
 	     hipe_rtl_arith_32,
diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl
index fff397b060..19b4e8bfe2 100644
--- a/lib/hipe/main/hipe.erl
+++ b/lib/hipe/main/hipe.erl
@@ -1230,6 +1230,18 @@ option_text(regalloc) ->
   "    optimistic - another variant of a coalescing allocator";
 option_text(remove_comments) ->
   "Strip comments from intermediate code";
+option_text(ra_range_split) ->
+  "Split live ranges of temporaries live over call instructions\n"
+  "before performing register allocation.\n"
+  "Heuristically tries to move stack accesses to the cold path of function.\n"
+  "This range splitter is more sophisticated than 'ra_restore_reuse', but has\n"
+  "a significantly larger impact on compile time.\n"
+  "Should only be used with move coalescing register allocators.";
+option_text(ra_restore_reuse) ->
+  "Split live ranges of temporaries such that straight-line\n"
+  "code will not need to contain multiple restores from the same stack\n"
+  "location.\n"
+  "Should only be used with move coalescing register allocators.";
 option_text(rtl_ssa) ->
   "Perform SSA conversion on the RTL level -- default starting at O2";
 option_text(rtl_ssa_const_prop) ->
@@ -1371,6 +1383,12 @@ opt_keys() ->
      pp_rtl_linear,
      ra_partitioned,
      ra_prespill,
+     ra_range_split,
+     ra_restore_reuse,
+     range_split_min_gain,
+     range_split_mode1_fudge,
+     range_split_weight_power,
+     range_split_weights,
      regalloc,
      remove_comments,
      rtl_ssa,
@@ -1409,7 +1427,8 @@ o1_opts(TargetArch) ->
 	    icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs,
 	    rtl_ssa, rtl_ssa_const_prop, rtl_ssapre,
 	    spillmin_color, use_indexing, remove_comments,
-	    binary_opt, {regalloc,coalescing} | o0_opts(TargetArch)],
+	    binary_opt, {regalloc,coalescing}, ra_restore_reuse
+	    | o0_opts(TargetArch)],
   case TargetArch of
     ultrasparc ->
       Common;
@@ -1429,7 +1448,8 @@ o1_opts(TargetArch) ->
 
 o2_opts(TargetArch) ->
   Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse,
-	    rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])],
+	    ra_range_split, range_split_weights, % XXX: Having defaults here is ugly
+	    rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre, ra_restore_reuse])],
   case TargetArch of
     T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets
       [icode_range | Common];
@@ -1477,6 +1497,9 @@ opt_negations() ->
    {no_pp_rtl_ssapre, pp_rtl_ssapre},
    {no_ra_partitioned, ra_partitioned},
    {no_ra_prespill, ra_prespill},
+   {no_ra_range_split, ra_range_split},
+   {no_ra_restore_reuse, ra_restore_reuse},
+   {no_range_split_weights, range_split_weights},
    {no_remove_comments, remove_comments},
    {no_rtl_ssa, rtl_ssa},
    {no_rtl_ssa_const_prop, rtl_ssa_const_prop},
diff --git a/lib/hipe/misc/hipe_consttab.erl b/lib/hipe/misc/hipe_consttab.erl
index 64e3d3ccaa..741bdb2094 100644
--- a/lib/hipe/misc/hipe_consttab.erl
+++ b/lib/hipe/misc/hipe_consttab.erl
@@ -63,9 +63,7 @@
 %%    A hipe_consttab is a tuple {Data, ReferedLabels, NextConstLabel}
 %% @type hipe_constlbl().
 %%   An abstract datatype for referring to data.
-%% @type element_type() = byte | word | ctab_array()
-%% @type ctab_array() = {ctab_array, Type::element_type(),
-%%                                   NoElements::pos_integer()}
+%% @type element_type() = byte | word
 %% @type block() = [integer() | label_ref()]
 %% @type label_ref() = {label, Label::code_label()}
 %% @type code_label() = hipe_sparc:label_name() | hipe_x86:label_name()
@@ -110,8 +108,7 @@
 -type label_ref()    :: {'label', code_label()}.
 -type block()	     :: [hipe_constlbl() | label_ref()].
 
--type ctab_array()   :: {'ctab_array', 'byte' | 'word', pos_integer()}.
--type element_type() :: 'byte' | 'word' | ctab_array().
+-type element_type() :: 'byte' | 'word'.
 
 -type sort_order()   :: term(). % XXX: FIXME
 
@@ -187,7 +184,7 @@ insert_block({ConstTab, RefToLabels, NextLabel}, ElementType, InitList) ->
   ReferredLabels = get_labels(InitList, []),
   NewRefTo = ReferredLabels ++ RefToLabels,
   {NewTa, Id} = insert_const({ConstTab, NewRefTo, NextLabel}, 
-			     block, word_size(), false,
+			     block, size_of(ElementType), false,
 			     {ElementType,InitList}),
   {insert_backrefs(NewTa, Id, ReferredLabels), Id}.
 
@@ -256,13 +253,9 @@ get_labels([], Acc) ->
   
 %% @spec size_of(element_type()) -> pos_integer()
 %% @doc Returns the size in bytes of an element_type.
-%%  The is_atom/1 guard in the clause handling arrays
-%%  constraints the argument to 'byte' | 'word'
 -spec size_of(element_type()) -> pos_integer().
 size_of(byte) -> 1;
-size_of(word) -> word_size();
-size_of({ctab_array,S,N}) when is_atom(S), is_integer(N), N > 0 ->
-    N * size_of(S).
+size_of(word) -> word_size().
 
 %% @spec decompose({element_type(), block()}) -> [byte()]
 %% @doc Turns a block into a list of bytes.
diff --git a/lib/hipe/misc/hipe_pack_constants.erl b/lib/hipe/misc/hipe_pack_constants.erl
index 9dd18bce0f..6736d1f503 100644
--- a/lib/hipe/misc/hipe_pack_constants.erl
+++ b/lib/hipe/misc/hipe_pack_constants.erl
@@ -13,7 +13,7 @@
 %% limitations under the License.
 
 -module(hipe_pack_constants).
--export([pack_constants/2, slim_refs/1, slim_constmap/1,
+-export([pack_constants/1, slim_refs/1, slim_constmap/1,
         find_const/2, mk_data_relocs/2, slim_sorted_exportmap/3]).
 
 -include("hipe_consttab.hrl").
@@ -37,8 +37,8 @@
 
 -record(pcm_entry, {mfa       :: mfa(),
 		    label     :: hipe_constlbl(),
-                   const_num :: const_num(),
-                   start     :: addr(),
+                    const_num :: const_num(),
+                    start     :: addr(),
 		    type      :: 0 | 1 | 2,
 		    raw_data  :: raw_data()}).
 -type pcm_entry() :: #pcm_entry{}.
@@ -53,11 +53,11 @@
 
 %%-----------------------------------------------------------------------------
 
--spec pack_constants([{mfa(),[_],hipe_consttab()}], ct_alignment()) ->
+-spec pack_constants([{mfa(),[_],hipe_consttab()}]) ->
        {ct_alignment(), non_neg_integer(), packed_const_map(), mfa_refs_map()}.
 
-pack_constants(Data, Align) ->
-  pack_constants(Data, 0, Align, 0, [], []).
+pack_constants(Data) ->
+  pack_constants(Data, 0, 1, 0, [], []).	% 1 = byte alignment
 
 pack_constants([{MFA,_,ConstTab}|Rest], Size, Align, ConstNo, Acc, Refs) ->
   Labels = hipe_consttab:labels(ConstTab),
diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile
index 684d6f45b4..5a729d04ae 100644
--- a/lib/hipe/opt/Makefile
+++ b/lib/hipe/opt/Makefile
@@ -43,7 +43,8 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)
 # ----------------------------------------------------
 # Target Specs
 # ----------------------------------------------------
-MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan
+MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan \
+	hipe_bb_weights
 
 HRL_FILES=
 ERL_FILES= $(MODULES:%=%.erl)
diff --git a/lib/hipe/opt/hipe_bb_weights.erl b/lib/hipe/opt/hipe_bb_weights.erl
new file mode 100644
index 0000000000..8ef113b94c
--- /dev/null
+++ b/lib/hipe/opt/hipe_bb_weights.erl
@@ -0,0 +1,449 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	                BASIC BLOCK WEIGHTING
+%%
+%% Computes basic block weights by using branch probabilities as weights in a
+%% linear equation system, that is then solved using Gauss-Jordan Elimination.
+%%
+%% The equation system representation is intentionally sparse, since most blocks
+%% have at most two successors.
+-module(hipe_bb_weights).
+-export([compute/3, compute_fast/3, weight/2, call_exn_pred/0]).
+-export_type([bb_weights/0]).
+
+-compile(inline).
+
+%%-define(DO_ASSERT,1).
+%%-define(DEBUG,1).
+-include("../main/hipe.hrl").
+
+%% If the equation system is large, it might take too long to solve it exactly.
+%% Thus, if there are more than ?HEUR_MAX_SOLVE labels, we use the iterative
+%% approximation.
+-define(HEUR_MAX_SOLVE, 10000).
+
+-opaque bb_weights() :: #{label() => float()}.
+
+-type cfg() :: any().
+-type target_module() :: module().
+-type target_context() :: any().
+-type target() :: {target_module(), target_context()}.
+
+-type label()            :: integer().
+-type var()              :: label().
+-type assignment()       :: {var(), float()}.
+-type eq_assoc()         :: [{var(), key()}].
+-type solution()         :: [assignment()].
+
+%% Constant. Predicted probability of a call resulting in an exception.
+-spec call_exn_pred() -> float().
+call_exn_pred() -> 0.01.
+
+-spec compute(cfg(), target_module(), target_context()) -> bb_weights().
+compute(CFG, TgtMod, TgtCtx) ->
+  Target = {TgtMod, TgtCtx},
+  Labels = labels(CFG, Target),
+  if length(Labels) > ?HEUR_MAX_SOLVE ->
+      ?debug_msg("~w: Too many labels (~w), approximating.~n",
+		 [?MODULE, length(Labels)]),
+      compute_fast(CFG, TgtMod, TgtCtx);
+     true ->
+      {EqSys, EqAssoc} = build_eq_system(CFG, Labels, Target),
+      case solve(EqSys, EqAssoc) of
+	{ok, Solution} ->
+	  maps:from_list(Solution)
+      end
+  end.
+
+-spec build_eq_system(cfg(), [label()], target()) -> {eq_system(), eq_assoc()}.
+build_eq_system(CFG, Labels, Target) ->
+  StartLb = hipe_gen_cfg:start_label(CFG),
+  EQS0 = eqs_new(),
+  {EQS1, Assoc} = build_eq_system(Labels, CFG, Target, [], EQS0),
+  {StartLb, StartKey} = lists:keyfind(StartLb, 1, Assoc),
+  StartRow0 = eqs_get(StartKey, EQS1),
+  StartRow = row_set_const(-1.0, StartRow0), % -1.0 since StartLb coef is -1.0
+  EQS = eqs_put(StartKey, StartRow, EQS1),
+  {EQS, Assoc}.
+
+build_eq_system([], _CFG, _Target, Map, EQS) -> {EQS, lists:reverse(Map)};
+build_eq_system([L|Ls], CFG, Target, Map, EQS0) ->
+  PredProb = pred_prob(L, CFG, Target),
+  {Key, EQS} = eqs_insert(row_new([{L, -1.0}|PredProb], 0.0), EQS0),
+  build_eq_system(Ls, CFG, Target, [{L, Key}|Map], EQS).
+
+pred_prob(L, CFG, Target) ->
+  [begin
+     BB = bb(CFG, Pred, Target),
+     Ps = branch_preds(hipe_bb:last(BB), Target),
+     ?ASSERT(length(lists:ukeysort(1, Ps))
+	     =:= length(hipe_gen_cfg:succ(CFG, Pred))),
+     case lists:keyfind(L, 1, Ps) of
+       {L, Prob} when is_float(Prob) -> {Pred, Prob}
+     end
+   end || Pred <- hipe_gen_cfg:pred(CFG, L)].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-spec triangelise(eq_system(), eq_assoc()) -> {eq_system(), eq_assoc()}.
+triangelise(EQS, VKs) ->
+  triangelise_1(mk_triix(EQS, VKs), []).
+
+triangelise_1(TIX0, Acc) ->
+  case triix_is_empty(TIX0) of
+    true -> {triix_eqs(TIX0), lists:reverse(Acc)};
+    false ->
+      {V,Key,TIX1} = triix_pop_smallest(TIX0),
+      Row0 = triix_get(Key, TIX1),
+      case row_get(V, Row0) of
+	Coef when Coef > -0.0001, Coef < 0.0001 ->
+	  throw(error);
+	_ ->
+	  Row = row_normalise(V, Row0),
+	  TIX2 = triix_put(Key, Row, TIX1),
+	  TIX = eliminate_triix(V, Key, Row, TIX2),
+	  triangelise_1(TIX, [{V,Key}|Acc])
+      end
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Triangelisation maintains its own index, outside of eqs. This index is
+%% essentially a BST (used as a heap) of all equations by size, with {Key,Var}
+%% as the values and only containing a subset of all the keys in the whole
+%% equation system. The key operation is triix_pop_smallest/1, which pops a
+%% {Key,Var} from the heap corresponding to one of the smallest equations. This
+%% is critical in order to prevent the equations from growing during
+%% triangelisation, which would make the algorithm O(n^2) in the common case.
+-type tri_eq_system() :: {eq_system(),
+			  gb_trees:tree(non_neg_integer(),
+					gb_trees:tree(key(), var()))}.
+
+triix_eqs({EQS, _}) -> EQS.
+triix_get(Key, {EQS, _}) -> eqs_get(Key, EQS).
+triix_is_empty({_, Tree}) -> gb_trees:is_empty(Tree).
+triix_lookup(V, {EQS, _}) -> eqs_lookup(V, EQS).
+
+mk_triix(EQS, VKs) ->
+  {EQS,
+   lists:foldl(fun({V,Key}, Tree) ->
+		   Size = row_size(eqs_get(Key, EQS)),
+		   sitree_insert(Size, Key, V, Tree)
+	       end, gb_trees:empty(), VKs)}.
+
+sitree_insert(Size, Key, V, SiTree) ->
+  SubTree1 =
+    case gb_trees:lookup(Size, SiTree) of
+      none -> gb_trees:empty();
+      {value, SubTree0} -> SubTree0
+    end,
+  SubTree = gb_trees:insert(Key, V, SubTree1),
+  gb_trees:enter(Size, SubTree, SiTree).
+
+sitree_update_subtree(Size, SubTree, SiTree) ->
+  case gb_trees:is_empty(SubTree) of
+    true -> gb_trees:delete(Size, SiTree);
+    false -> gb_trees:update(Size, SubTree, SiTree)
+  end.
+
+triix_put(Key, Row, {EQS, Tree0}) ->
+  OldSize = row_size(eqs_get(Key, EQS)),
+  case row_size(Row) of
+    OldSize -> {eqs_put(Key, Row, EQS), Tree0};
+    Size ->
+      Tree =
+	case gb_trees:lookup(OldSize, Tree0) of
+	  none -> Tree0;
+	  {value, SubTree0} ->
+	    case gb_trees:lookup(Key, SubTree0) of
+	      none -> Tree0;
+	      {value, V} ->
+		SubTree = gb_trees:delete(Key, SubTree0),
+		Tree1 = sitree_update_subtree(OldSize, SubTree, Tree0),
+		sitree_insert(Size, Key, V, Tree1)
+	    end
+	end,
+      {eqs_put(Key, Row, EQS), Tree}
+  end.
+
+triix_pop_smallest({EQS, Tree}) ->
+  {Size, SubTree0} = gb_trees:smallest(Tree),
+  {Key, V, SubTree} = gb_trees:take_smallest(SubTree0),
+  {V, Key, {EQS, sitree_update_subtree(Size, SubTree, Tree)}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+row_normalise(Var, Row) ->
+  %% Normalise v's coef to 1.0
+  %% row_set_coef ensures the coef is exactly 1.0 (no rounding errors)
+  row_set_coef(Var, 1.0, row_scale(Row, 1.0/row_get(Var, Row))).
+
+%% Precondition: Row must be normalised; i.e. Vars coef must be 1.0 (mod
+%% rounding errors)
+-spec eliminate(var(), key(), row(), eq_system()) -> eq_system().
+eliminate(Var, Key, Row, TIX0) ->
+  eliminate_abstr(Var, Key, Row, TIX0,
+		  fun eqs_get/2, fun eqs_lookup/2, fun eqs_put/3).
+
+-spec eliminate_triix(var(), key(), row(), tri_eq_system()) -> tri_eq_system().
+eliminate_triix(Var, Key, Row, TIX0) ->
+  eliminate_abstr(Var, Key, Row, TIX0,
+		  fun triix_get/2, fun triix_lookup/2, fun triix_put/3).
+
+%% The same function implemented for two data types, eqs and triix.
+-compile({inline, eliminate_abstr/7}).
+-spec eliminate_abstr(var(), key(), row(), ADT, fun((key(), ADT) -> row()),
+		      fun((var(), ADT) -> [key()]),
+		      fun((key(), row(), ADT) -> ADT)) -> ADT.
+eliminate_abstr(Var, Key, Row, ADT0, GetFun, LookupFun, PutFun) ->
+  ?ASSERT(1.0 =:= row_get(Var, Row)),
+  ADT =
+    lists:foldl(fun(RK, ADT1) when RK =:= Key -> ADT1;
+		   (RK, ADT1) ->
+		    R = GetFun(RK, ADT1),
+		    PutFun(RK, row_addmul(R, Row, -row_get(Var, R)), ADT1)
+		end, ADT0, LookupFun(Var, ADT0)),
+  [Key] = LookupFun(Var, ADT),
+  ADT.
+
+-spec solve(eq_system(), eq_assoc()) -> error | {ok, solution()}.
+solve(EQS0, EqAssoc0) ->
+  try triangelise(EQS0, EqAssoc0)
+  of {EQS1, EqAssoc} ->
+      {ok, solve_1(EqAssoc, maps:from_list(EqAssoc), EQS1, [])}
+  catch error -> error
+  end.
+
+solve_1([], _VarEqs, _EQS, Acc) -> Acc;
+solve_1([{V,K}|Ps], VarEqs, EQS0, Acc0) ->
+  Row0 = eqs_get(K, EQS0),
+  VarsToKill = [Var || {Var, _} <- row_coefs(Row0), Var =/= V],
+  Row1 = kill_vars(VarsToKill, VarEqs, EQS0, Row0),
+  [{V,_}] = row_coefs(Row1), % assertion
+  Row = row_normalise(V, Row1),
+  [{V,1.0}] = row_coefs(Row), % assertion
+  EQS = eliminate(V, K, Row, EQS0),
+  [K] = eqs_lookup(V, EQS),
+  solve_1(Ps, VarEqs, eqs_remove(K, EQS), [{V, row_const(Row)}|Acc0]).
+
+kill_vars([], _VarEqs, _EQS, Row) -> Row;
+kill_vars([V|Vs], VarEqs, EQS, Row0) ->
+  VRow0 = eqs_get(maps:get(V, VarEqs), EQS),
+  VRow = row_normalise(V, VRow0),
+  ?ASSERT(1.0 =:= row_get(V, VRow)),
+  Row = row_addmul(Row0, VRow, -row_get(V, Row0)),
+  ?ASSERT(0.0 =:= row_get(V, Row)), % V has been killed
+  kill_vars(Vs, VarEqs, EQS, Row).
+
+-spec weight(label(), bb_weights()) -> float().
+weight(Lbl, Weights) ->
+  maps:get(Lbl, Weights).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Row datatype
+%% Invariant: No 0.0 coefficiets!
+-spec row_empty() -> row().
+row_empty() -> {orddict:new(), 0.0}.
+
+-spec row_new([{var(), float()}], float()) -> row().
+row_new(Coefs, Const) when is_float(Const) ->
+  row_ensure_invar({row_squash_multiples(lists:keysort(1, Coefs)), Const}).
+
+row_squash_multiples([{K, C1},{K, C2}|Ps]) ->
+  row_squash_multiples([{K,C1+C2}|Ps]);
+row_squash_multiples([P|Ps]) -> [P|row_squash_multiples(Ps)];
+row_squash_multiples([]) -> [].
+
+row_ensure_invar({Coef, Const}) ->
+  {orddict:filter(fun(_, 0.0) -> false; (_, F) when is_float(F) -> true end,
+		  Coef), Const}.
+
+row_const({_, Const}) -> Const.
+row_coefs({Coefs, _}) -> orddict:to_list(Coefs).
+row_size({Coefs, _}) -> orddict:size(Coefs).
+
+row_get(Var, {Coefs, _}) ->
+  case lists:keyfind(Var, 1, Coefs) of
+    false -> 0.0;
+    {_, Coef} -> Coef
+  end.
+
+row_set_coef(Var, 0.0, {Coefs, Const}) ->
+  {orddict:erase(Var, Coefs), Const};
+row_set_coef(Var, Coef, {Coefs, Const}) ->
+  {orddict:store(Var, Coef, Coefs), Const}.
+
+row_set_const(Const, {Coefs, _}) -> {Coefs, Const}.
+
+%% Lhs + Rhs*Factor
+-spec row_addmul(row(), row(), float()) -> row().
+row_addmul({LhsCoefs, LhsConst}, {RhsCoefs, RhsConst}, Factor)
+  when is_float(Factor) ->
+  Coefs = row_addmul_coefs(LhsCoefs, RhsCoefs, Factor),
+  Const = LhsConst + RhsConst * Factor,
+  {Coefs, Const}.
+
+row_addmul_coefs(Ls, [], Factor) when is_float(Factor) -> Ls;
+row_addmul_coefs([], Rs, Factor) when is_float(Factor) ->
+  row_scale_coefs(Rs, Factor);
+row_addmul_coefs([L={LV, _}|Ls], Rs=[{RV,_}|_], Factor)
+  when LV < RV, is_float(Factor) ->
+  [L|row_addmul_coefs(Ls, Rs, Factor)];
+row_addmul_coefs(Ls=[{LV, _}|_], [{RV, RC}|Rs], Factor)
+  when LV > RV, is_float(RC), is_float(Factor) ->
+  [{RV, RC*Factor}|row_addmul_coefs(Ls, Rs, Factor)];
+row_addmul_coefs([{V, LC}|Ls], [{V, RC}|Rs], Factor)
+  when is_float(LC), is_float(RC), is_float(Factor) ->
+  case LC + RC * Factor of
+    0.0 ->      row_addmul_coefs(Ls, Rs, Factor);
+    C -> [{V,C}|row_addmul_coefs(Ls, Rs, Factor)]
+  end.
+
+row_scale(_, 0.0) -> row_empty();
+row_scale({RowCoefs, RowConst}, Factor) when is_float(Factor) ->
+  {row_scale_coefs(RowCoefs, Factor), RowConst * Factor}.
+
+row_scale_coefs([{V,C}|Cs], Factor) when is_float(Factor), is_float(C) ->
+  [{V,C*Factor}|row_scale_coefs(Cs, Factor)];
+row_scale_coefs([], Factor) when is_float(Factor) ->
+  [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Equation system ADT
+%%
+%% Stores a linear equation system, allowing for efficient updates and efficient
+%% queries for all equations mentioning a variable.
+%%
+%% It is sort of like a "database" table of {Primary, Terms, Const} indexed both
+%% on Primary as well as the vars (map keys) in Terms.
+-type row()       :: {Terms :: orddict:orddict(var(), float()),
+		      Const :: float()}.
+-type key()       :: non_neg_integer().
+-type rev_index() :: #{var() => ordsets:ordset(key())}.
+-record(eq_system, {
+	  rows = #{}              :: #{key() => row()},
+	  revidx = revidx_empty() :: rev_index(),
+	  next_key = 0            :: key()
+	 }).
+-type eq_system() :: #eq_system{}.
+
+eqs_new() -> #eq_system{}.
+
+-spec eqs_insert(row(), eq_system()) -> {key(), eq_system()}.
+eqs_insert(Row, EQS=#eq_system{next_key=NextKey0}) ->
+  Key = NextKey0,
+  NextKey = NextKey0 + 1,
+  {Key, eqs_insert(Key, Row, EQS#eq_system{next_key=NextKey})}.
+
+eqs_insert(Key, Row, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) ->
+  RevIdx = revidx_add(Key, Row, RevIdx0),
+  EQS#eq_system{rows=Rows#{Key => Row}, revidx=RevIdx}.
+
+eqs_put(Key, Row, EQS0) ->
+  eqs_insert(Key, Row, eqs_remove(Key, EQS0)).
+
+eqs_remove(Key, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) ->
+  OldRow = maps:get(Key, Rows),
+  RevIdx = revidx_remove(Key, OldRow, RevIdx0),
+  EQS#eq_system{rows = maps:remove(Key, Rows), revidx=RevIdx}.
+
+-spec eqs_get(key(), eq_system()) -> row().
+eqs_get(Key, #eq_system{rows=Rows}) -> maps:get(Key, Rows).
+
+%% Keys of all equations containing a nonzero coefficient for Var
+-spec eqs_lookup(var(), eq_system()) -> ordsets:ordset(key()).
+eqs_lookup(Var, #eq_system{revidx=RevIdx}) -> maps:get(Var, RevIdx).
+
+%% eqs_rows(#eq_system{rows=Rows}) -> maps:to_list(Rows).
+
+%% eqs_print(EQS) ->
+%%   lists:foreach(fun({_, Row}) ->
+%% 		    row_print(Row)
+%% 		end, lists:sort(eqs_rows(EQS))).
+
+%% row_print(Row) ->
+%%   CoefStrs = [io_lib:format("~wl~w", [Coef, Var])
+%% 	      || {Var, Coef} <- row_coefs(Row)],
+%%   CoefStr = lists:join(" + ", CoefStrs),
+%%   io:format("~w = ~s~n", [row_const(Row), CoefStr]).
+
+revidx_empty() -> #{}.
+
+-spec revidx_add(key(), row(), rev_index()) -> rev_index().
+revidx_add(Key, Row, RevIdx0) ->
+  orddict:fold(fun(Var, _Coef, RevIdx1) ->
+		?ASSERT(_Coef /= 0.0),
+		RevIdx1#{Var => ordsets:add_element(
+				  Key, maps:get(Var, RevIdx1, ordsets:new()))}
+	    end, RevIdx0, row_coefs(Row)).
+
+-spec revidx_remove(key(), row(), rev_index()) -> rev_index().
+revidx_remove(Key, {Coefs, _}, RevIdx0) ->
+  orddict:fold(fun(Var, _Coef, RevIdx1) ->
+		case RevIdx1 of
+		  #{Var := Keys0} ->
+		    case ordsets:del_element(Key, Keys0) of
+		      [] -> maps:remove(Var, RevIdx1);
+		      Keys -> RevIdx1#{Var := Keys}
+		    end
+		end
+	    end, RevIdx0, Coefs).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(FAST_ITERATIONS, 5).
+
+%% @doc Computes a rough approximation of BB weights. The approximation is
+%% particularly poor (converges slowly) for recursive functions and loops.
+-spec compute_fast(cfg(), target_module(), target_context()) -> bb_weights().
+compute_fast(CFG, TgtMod, TgtCtx) ->
+  Target = {TgtMod, TgtCtx},
+  StartLb = hipe_gen_cfg:start_label(CFG),
+  RPO = reverse_postorder(CFG, Target),
+  PredProbs = [{L, pred_prob(L, CFG, Target)} || L <- RPO, L =/= StartLb],
+  Probs0 = (maps:from_list([{L, 0.0} || L <- RPO]))#{StartLb := 1.0},
+  fast_iterate(?FAST_ITERATIONS, PredProbs, Probs0).
+
+fast_iterate(0, _Pred, Probs) -> Probs;
+fast_iterate(Iters, Pred, Probs0) ->
+  fast_iterate(Iters-1, Pred,
+	       fast_one(Pred, Probs0)).
+
+fast_one([{L, Pred}|Ls], Probs0) ->
+  Weight = fast_sum(Pred, Probs0, 0.0),
+  Probs = Probs0#{L => Weight},
+  fast_one(Ls, Probs);
+fast_one([], Probs) ->
+  Probs.
+
+fast_sum([{P,EWt}|Pred], Probs, Acc) when is_float(EWt), is_float(Acc) ->
+  case Probs of
+    #{P := PWt} when is_float(PWt) ->
+      fast_sum(Pred, Probs, Acc + PWt * EWt)
+  end;
+fast_sum([], _Probs, Acc) when is_float(Acc) ->
+  Acc.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(branch_preds).
+?TGT_IFACE_1(labels).
+?TGT_IFACE_1(reverse_postorder).
diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl
index 41f1972df7..f87d9a5b61 100644
--- a/lib/hipe/opt/hipe_spillmin_color.erl
+++ b/lib/hipe/opt/hipe_spillmin_color.erl
@@ -166,9 +166,13 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) ->
 %%
 
 build_ig(CFG, Live, Target, TempMap) ->
-  try build_ig0(CFG, Live, Target, TempMap)
-  catch error:Rsn -> exit({regalloc, build_ig, Rsn})
-  end.
+  TempMapping = map_spilled_temporaries(TempMap),
+  TempMappingTable = setup_ets(TempMapping),
+  NumSpilled = length(TempMapping),
+  IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled),
+		    Target, TempMap, TempMappingTable),
+  ets:delete(TempMappingTable),
+  {normalize_ig(IG), NumSpilled}.
 
 %% Creates an ETS table consisting of the keys given in List, with the values
 %% being an integer which is the position of the key in List.
@@ -183,15 +187,6 @@ setup_ets0([X|Xs], Table, N) ->
   ets:insert(Table, {X, N}),
   setup_ets0(Xs, Table, N+1).
 
-build_ig0(CFG, Live, Target, TempMap) ->
-  TempMapping = map_spilled_temporaries(TempMap),
-  TempMappingTable = setup_ets(TempMapping),
-  NumSpilled = length(TempMapping),
-  IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled),
-		    Target, TempMap, TempMappingTable),
-  ets:delete(TempMappingTable),
-  {normalize_ig(IG), NumSpilled}.
-
 build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) ->
   IG;
 build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) ->
@@ -212,16 +207,26 @@ build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) ->
     build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping),
   build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping).
 
-build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) ->
+build_ig_instr(X, Live0, IG0, Target, TempMap, TempMapping) ->
   {Def, Use} = def_use(X, Target, TempMap),
-  ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]),
+  ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live0, X, Def,Use]),
   DefListMapped = list_map(Def, TempMapping, []),
   UseListMapped = list_map(Use, TempMapping, []),
   DefSetMapped = ordsets:from_list(DefListMapped),
   UseSetMapped = ordsets:from_list(UseListMapped),
-  NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG),
-  NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)),
-  {NewLive, NewIG}.
+  {Live1, IG1} =
+    analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped),
+  IG = interference_arcs(DefListMapped, ordsets:to_list(Live1), IG1),
+  Live = ordsets:union(UseSetMapped, ordsets:subtract(Live1, DefSetMapped)),
+  {Live, IG}.
+
+analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped) ->
+  case {is_spill_move(X, Target), DefSetMapped, UseSetMapped} of
+    {true, [Dst], [Src]} ->
+      {ordsets:del_element(Src, Live0), add_move(Src, Dst, IG0)};
+    {_, _, _} ->
+      {Live0, IG0}
+  end.
 
 %% Given a list of Keys and an ets-table returns a list of the elements 
 %% in Mapping corresponding to the Keys and appends Acc to this list.
@@ -271,15 +276,6 @@ i_arcs(X, [Y|Ys], IG) ->
 %%     throw an exception (the caller should retry with more stack slots)
 
 color(IG, StackSlots, NumNodes, Target) ->
-  try
-    color_0(IG, StackSlots, NumNodes, Target)
-  catch
-    error:Rsn ->
-      ?error_msg("Coloring failed with ~p~n", [Rsn]),
-      ?EXIT(Rsn)
-  end.
-
-color_0(IG, StackSlots, NumNodes, Target) -> 
   ?report("simplification of IG~n", []),
   K = ordsets:size(StackSlots),
   Nodes = list_ig(IG),
@@ -382,7 +378,8 @@ select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) ->
 
 select_color(X, IG, Cols, PhysRegs) ->
   UsedColors = get_colors(neighbors(X, IG), Cols),
-  Reg = select_unused_color(UsedColors, PhysRegs),
+  Preferences = get_colors(move_connected(X, IG), Cols),
+  Reg = select_unused_color(UsedColors, Preferences, PhysRegs),
   {Reg, set_color(X, Reg, Cols)}.
 
 %%%%%%%%%%%%%%%%%%%%
@@ -396,10 +393,14 @@ get_colors([X|Xs], Cols) ->
       [R|get_colors(Xs, Cols)]
   end.
 
-select_unused_color(UsedColors, PhysRegs) ->
+select_unused_color(UsedColors, Preferences, PhysRegs) ->
   Summary = ordsets:from_list(UsedColors),
-  AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)),
-  hd(AvailRegs).
+  case ordsets:subtract(ordsets:from_list(Preferences), Summary) of
+    [PreferredColor|_] -> PreferredColor;
+    _ ->
+      AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)),
+      hd(AvailRegs)
+  end.
 
 push_colored(X, Stk) ->
   [{X, colorable} | Stk].
@@ -456,7 +457,11 @@ init_stackslots(NumSlots, Acc) ->
 %%
 %% Note: later on, we may wish to add 'move-related' support.
 
--record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}).
+-record(ig_info, {
+	  neighbors = []      :: [_],
+	  degree = 0          :: non_neg_integer(),
+	  move_connected = [] :: [_]
+	 }).
 
 empty_ig(NumNodes) ->
   hipe_vectors:new(NumNodes, #ig_info{}).
@@ -467,16 +472,29 @@ degree(Info) ->
 neighbors(Info) ->
   Info#ig_info.neighbors.
 
+move_connected(Info) ->
+  Info#ig_info.move_connected.
+
 add_edge(X, X, IG) -> IG;
 add_edge(X, Y, IG) ->
   add_arc(X, Y, add_arc(Y, X, IG)).
 
+add_move(X, X, IG) -> IG;
+add_move(X, Y, IG) ->
+  add_move_arc(X, Y, add_move_arc(Y, X, IG)).
+
 add_arc(X, Y, IG) ->
   Info = hipe_vectors:get(IG, X),
   Old = neighbors(Info),
   New = Info#ig_info{neighbors = [Y|Old]},
   hipe_vectors:set(IG,X,New).
 
+add_move_arc(X, Y, IG) ->
+  Info = hipe_vectors:get(IG, X),
+  Old = move_connected(Info),
+  New = Info#ig_info{move_connected = [Y|Old]},
+  hipe_vectors:set(IG,X,New).
+
 normalize_ig(IG) ->
   Size = hipe_vectors:size(IG),
   normalize_ig(Size-1, IG).
@@ -486,7 +504,8 @@ normalize_ig(-1, IG) ->
 normalize_ig(I, IG) ->
   Info = hipe_vectors:get(IG, I),
   N = ordsets:from_list(neighbors(Info)),
-  NewInfo = Info#ig_info{neighbors = N, degree = length(N)},
+  M = ordsets:subtract(ordsets:from_list(move_connected(Info)), N),
+  NewInfo = Info#ig_info{neighbors = N, degree = length(N), move_connected = M},
   NewIG = hipe_vectors:set(IG, I, NewInfo),
   normalize_ig(I-1, NewIG).
 
@@ -494,6 +513,10 @@ neighbors(X, IG) ->
   Info = hipe_vectors:get(IG, X),
   Info#ig_info.neighbors.
 
+move_connected(X, IG) ->
+  Info = hipe_vectors:get(IG, X),
+  Info#ig_info.move_connected.
+
 decrement_degree(X, IG) ->
   Info = hipe_vectors:get(IG, X),
   Degree = degree(Info),
@@ -555,3 +578,6 @@ def_use(X, Target={TgtMod,TgtCtx}, TempMap) ->
 
 reg_names(Regs, {TgtMod,TgtCtx}) ->
   [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
+
+is_spill_move(Instr, {TgtMod,TgtCtx}) ->
+  TgtMod:is_spill_move(Instr, TgtCtx).
diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl
index df9f193fa3..63ecd0a0b8 100644
--- a/lib/hipe/ppc/hipe_ppc.erl
+++ b/lib/hipe/ppc/hipe_ppc.erl
@@ -98,6 +98,9 @@
 	 pseudo_move_dst/1,
 	 pseudo_move_src/1,
 
+	 mk_pseudo_spill_move/3,
+	 is_pseudo_spill_move/1,
+
 	 mk_pseudo_tailcall/4,
 	 pseudo_tailcall_func/1,
 	 pseudo_tailcall_stkargs/1,
@@ -131,6 +134,9 @@
 	 pseudo_fmove_dst/1,
 	 pseudo_fmove_src/1,
 
+	 mk_pseudo_spill_fmove/3,
+	 is_pseudo_spill_fmove/1,
+
 	 mk_defun/8,
 	 defun_mfa/1,
 	 defun_formals/1,
@@ -412,6 +418,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end.
 pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst.
 pseudo_move_src(#pseudo_move{src=Src}) -> Src.
 
+mk_pseudo_spill_move(Dst, Temp, Src) ->
+  #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}.
+is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move).
+
 mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage) ->
   #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage}.
 pseudo_tailcall_func(#pseudo_tailcall{func=FunC}) -> FunC.
@@ -495,6 +505,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end.
 pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst.
 pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src.
 
+mk_pseudo_spill_fmove(Dst, Temp, Src) ->
+  #pseudo_spill_fmove{dst=Dst, temp=Temp, src=Src}.
+is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove).
+
 mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) ->
   #defun{mfa=MFA, formals=Formals, code=Code, data=Data,
 	 isclosure=IsClosure, isleaf=IsLeaf,
diff --git a/lib/hipe/ppc/hipe_ppc.hrl b/lib/hipe/ppc/hipe_ppc.hrl
index a96692c52e..3eef8be487 100644
--- a/lib/hipe/ppc/hipe_ppc.hrl
+++ b/lib/hipe/ppc/hipe_ppc.hrl
@@ -87,6 +87,7 @@
 -record(pseudo_call_prepare, {nrstkargs}).
 -record(pseudo_li, {dst, imm}).
 -record(pseudo_move, {dst, src}).
+-record(pseudo_spill_move, {dst, temp, src}).
 -record(pseudo_tailcall, {func, arity, stkargs, linkage}).
 -record(pseudo_tailcall_prepare, {}).
 -record(store, {stop, src, disp, base}).	% non-indexed, non-update form
@@ -99,6 +100,7 @@
 -record(fp_binary, {fp_binop, dst, src1, src2}).
 -record(fp_unary, {fp_unop, dst, src}).
 -record(pseudo_fmove, {dst, src}).
+-record(pseudo_spill_fmove, {dst, temp, src}).
 
 %%% Function definitions.
 
diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl
index 66817837df..b0f57e5582 100644
--- a/lib/hipe/ppc/hipe_ppc_assemble.erl
+++ b/lib/hipe/ppc/hipe_ppc_assemble.erl
@@ -32,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) ->
 	  || {MFA, Defun} <- CompiledCode],
   %%
   {ConstAlign,ConstSize,ConstMap,RefsFromConsts} =
-    hipe_pack_constants:pack_constants(Code, hipe_rtl_arch:word_size()),
+    hipe_pack_constants:pack_constants(Code),
   %%
   {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} =
     encode(translate(Code, ConstMap), Options),
diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl
index f17c0ac503..d44d38f38d 100644
--- a/lib/hipe/ppc/hipe_ppc_cfg.erl
+++ b/lib/hipe/ppc/hipe_ppc_cfg.erl
@@ -21,8 +21,8 @@
          bb/2, bb_add/3]).
 -export([postorder/1]).
 -export([linearise/1, params/1, reverse_postorder/1]).
--export([arity/1]).
-%%%-export([redirect_jmp/3, arity/1]).
+-export([redirect_jmp/3, arity/1]).
+-export([branch_preds/1]).
 
 %%% these tell cfg.inc what to define (ugly as hell)
 -define(BREADTH_ORDER,true).
@@ -75,11 +75,30 @@ branch_successors(Branch) ->
     #pseudo_tailcall{} -> []
   end.
 
+branch_preds(Branch) ->
+  case Branch of
+    #bctr{labels=Labels} ->
+      Prob = 1.0/length(Labels),
+      [{L, Prob} || L <- Labels];
+    #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} ->
+      [{FalseLab, 1.0-Pred}, {TrueLab, Pred}];
+    #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=[]}} ->
+      %% A function can still cause an exception, even if we won't catch it
+      [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}];
+    #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=ExnLab}} ->
+      CallExnPred = hipe_bb_weights:call_exn_pred(),
+      [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}];
+    _ ->
+      case branch_successors(Branch) of
+	[] -> [];
+	[Single] -> [{Single, 1.0}]
+      end
+  end.
+
 -ifdef(REMOVE_TRIVIAL_BBS_NEEDED).
 fails_to(_Instr) -> [].
 -endif.
 
--ifdef(notdef).
 redirect_jmp(I, Old, New) ->
   case I of
     #b_label{label=Label} ->
@@ -93,10 +112,16 @@ redirect_jmp(I, Old, New) ->
       if Old =:= FalseLab -> I1#pseudo_bc{false_label=New};
 	 true -> I1
       end;
-    %% handle pseudo_call too?
-    _ -> I
+    #pseudo_call{sdesc=SDesc0, contlab=ContLab0} ->
+      SDesc = case SDesc0 of
+		#ppc_sdesc{exnlab=Old} -> SDesc0#ppc_sdesc{exnlab=New};
+		#ppc_sdesc{exnlab=_}   -> SDesc0
+	      end,
+      ContLab = if Old =:= ContLab0 -> New;
+		   true -> ContLab0
+		end,
+      I#pseudo_call{sdesc=SDesc, contlab=ContLab}
   end.
--endif.
 
 mk_goto(Label) ->
   hipe_ppc:mk_b_label(Label).
diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl
index 9a99611493..d8a864f7d5 100644
--- a/lib/hipe/ppc/hipe_ppc_defuse.erl
+++ b/lib/hipe/ppc/hipe_ppc_defuse.erl
@@ -41,6 +41,7 @@ insn_def_gpr(I) ->
     #pseudo_call{} -> call_clobbered_gpr();
     #pseudo_li{dst=Dst} -> [Dst];
     #pseudo_move{dst=Dst} -> [Dst];
+    #pseudo_spill_move{dst=Dst,temp=Temp} -> [Dst, Temp];
     #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr();
     #unary{dst=Dst} -> [Dst];
     _ -> []
@@ -71,6 +72,7 @@ insn_use_gpr(I) ->
     #mtspr{src=Src} -> [Src];
     #pseudo_call{sdesc=#ppc_sdesc{arity=Arity}} -> arity_use_gpr(Arity);
     #pseudo_move{src=Src} -> [Src];
+    #pseudo_spill_move{src=Src} -> [Src];
     #pseudo_tailcall{arity=Arity,stkargs=StkArgs} ->
       addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), arity_use_gpr(Arity)));
     #store{src=Src,base=Base} -> addtemp(Src, [Base]);
@@ -110,6 +112,7 @@ insn_def_fpr(I) ->
     #fp_binary{dst=Dst} -> [Dst];
     #fp_unary{dst=Dst} -> [Dst];
     #pseudo_fmove{dst=Dst} -> [Dst];
+    #pseudo_spill_fmove{dst=Dst,temp=Temp} -> [Dst, Temp];
     _ -> []
   end.
 
@@ -126,6 +129,7 @@ insn_use_fpr(I) ->
     #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]);
     #fp_unary{src=Src} -> [Src];
     #pseudo_fmove{src=Src} -> [Src];
+    #pseudo_spill_fmove{src=Src} -> [Src];
     _ -> []
   end.
 
diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl
index a91cb18cc2..b88b75a5bd 100644
--- a/lib/hipe/ppc/hipe_ppc_frame.erl
+++ b/lib/hipe/ppc/hipe_ppc_frame.erl
@@ -66,10 +66,14 @@ do_insn(I, LiveOut, Context, FPoff) ->
       do_pseudo_call_prepare(I, FPoff);
     #pseudo_move{} ->
       {do_pseudo_move(I, Context, FPoff), FPoff};
+    #pseudo_spill_move{} ->
+      {do_pseudo_spill_move(I, Context, FPoff), FPoff};
     #pseudo_tailcall{} ->
       {do_pseudo_tailcall(I, Context), context_framesize(Context)};
     #pseudo_fmove{} ->
       {do_pseudo_fmove(I, Context, FPoff), FPoff};
+    #pseudo_spill_fmove{} ->
+      {do_pseudo_spill_fmove(I, Context, FPoff), FPoff};
     _ ->
       {[I], FPoff}
   end.
@@ -98,6 +102,22 @@ do_pseudo_move(I, Context, FPoff) ->
       end
   end.
 
+do_pseudo_spill_move(I, Context, FPoff) ->
+  #pseudo_spill_move{dst=Dst,temp=Temp,src=Src} = I,
+  case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of
+    false -> % Register allocator changed its mind, turn back to move
+      do_pseudo_move(hipe_ppc:mk_pseudo_move(Dst, Src), Context, FPoff);
+    true ->
+      SrcOffset = pseudo_offset(Src, FPoff, Context),
+      DstOffset = pseudo_offset(Dst, FPoff, Context),
+      case SrcOffset =:= DstOffset of
+	true -> []; % omit move-to-self
+	false ->
+	  mk_load(hipe_ppc:ldop_word(), Temp, SrcOffset, mk_sp(),
+		  mk_store(hipe_ppc:stop_word(), Temp, DstOffset, mk_sp(), []))
+      end
+  end.
+
 do_pseudo_fmove(I, Context, FPoff) ->
   Dst = hipe_ppc:pseudo_fmove_dst(I),
   Src = hipe_ppc:pseudo_fmove_src(I),
@@ -115,6 +135,22 @@ do_pseudo_fmove(I, Context, FPoff) ->
       end
   end.
 
+do_pseudo_spill_fmove(I, Context, FPoff) ->
+  #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src} = I,
+  case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of
+    false -> % Register allocator changed its mind, turn back to move
+      do_pseudo_fmove(hipe_ppc:mk_pseudo_fmove(Dst, Src), Context, FPoff);
+    true ->
+      SrcOffset = pseudo_offset(Src, FPoff, Context),
+      DstOffset = pseudo_offset(Dst, FPoff, Context),
+      case SrcOffset =:= DstOffset of
+	true -> []; % omit move-to-self
+	false ->
+	  hipe_ppc:mk_fload(Temp, SrcOffset, mk_sp(), 0)
+	    ++ hipe_ppc:mk_fstore(Temp, DstOffset, mk_sp(), 0)
+      end
+  end.
+
 pseudo_offset(Temp, FPoff, Context) ->
   FPoff + context_offset(Context, Temp).
 
diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl
index 74ef7475eb..bca504d754 100644
--- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl
+++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl
@@ -41,6 +41,7 @@ ra_insn(I, Map, FPMap) ->
     #mtspr{} -> ra_mtspr(I, Map);
     #pseudo_li{} -> ra_pseudo_li(I, Map);
     #pseudo_move{} -> ra_pseudo_move(I, Map);
+    #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map);
     #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map);
     #store{} -> ra_store(I, Map);
     #storex{} -> ra_storex(I, Map);
@@ -52,6 +53,7 @@ ra_insn(I, Map, FPMap) ->
     #fp_binary{} -> ra_fp_binary(I, FPMap);
     #fp_unary{} -> ra_fp_unary(I, FPMap);
     #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap);
+    #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap);
     _ -> I
   end.
 
@@ -98,6 +100,12 @@ ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) ->
   NewSrc = ra_temp(Src, Map),
   I#pseudo_move{dst=NewDst,src=NewSrc}.
 
+ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) ->
+  NewDst = ra_temp(Dst, Map),
+  NewTemp = ra_temp(Temp, Map),
+  NewSrc = ra_temp(Src, Map),
+  I#pseudo_spill_move{dst=NewDst,temp=NewTemp,src=NewSrc}.
+
 ra_pseudo_tailcall(I=#pseudo_tailcall{stkargs=StkArgs}, Map) ->
   NewStkArgs = ra_args(StkArgs, Map),
   I#pseudo_tailcall{stkargs=NewStkArgs}.
@@ -156,6 +164,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, FPMap) ->
   NewSrc = ra_temp_fp(Src, FPMap),
   I#pseudo_fmove{dst=NewDst,src=NewSrc}.
 
+ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src},
+		      FPMap) ->
+  NewDst = ra_temp_fp(Dst, FPMap),
+  NewTemp = ra_temp_fp(Temp, FPMap),
+  NewSrc = ra_temp_fp(Src, FPMap),
+  I#pseudo_spill_fmove{dst=NewDst,temp=NewTemp,src=NewSrc}.
+
 ra_args([Arg|Args], Map) ->
   [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)];
 ra_args([], _) ->
diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl
index 95aa294fe5..0a97129666 100644
--- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl
+++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl
@@ -57,6 +57,7 @@ do_insn(I, TempMap, Strategy) ->
     #mtspr{} -> do_mtspr(I, TempMap, Strategy);
     #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy);
     #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy);
+    #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy);
     #store{} -> do_store(I, TempMap, Strategy);
     #storex{} -> do_storex(I, TempMap, Strategy);
     #unary{} -> do_unary(I, TempMap, Strategy);
@@ -117,18 +118,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) ->
 
 do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) ->
   %% Either Dst or Src (but not both) may be a pseudo temp.
-  %% pseudo_move and pseudo_tailcall are special cases: in
-  %% all other instructions, all temps must be non-pseudos
-  %% after register allocation.
-  case temp_is_spilled(Dst, TempMap) of
-    true -> % Src must not be a pseudo
-      {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy),
-      NewI = I#pseudo_move{src=NewSrc},
-      {FixSrc ++ [NewI], DidSpill};
+  %% pseudo_move, pseudo_spill_move, and pseudo_tailcall are
+  %% special cases: in all other instructions, all temps
+  %% must be non-pseudos after register allocation.
+  case temp_is_spilled(Src, TempMap)
+    andalso temp_is_spilled(Dst, TempMap)
+  of
+    true -> % Turn into pseudo_spill_move
+      Temp = clone(Src, temp1(Strategy)),
+      NewI = #pseudo_spill_move{dst=Dst,temp=Temp,src=Src},
+      {[NewI], true};
     _ ->
       {[I], false}
   end.
 
+do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = temp_is_spilled(Temp, TempMap),
+  {[I], false}.
+
 do_store(I=#store{src=Src,base=Base}, TempMap, Strategy) ->
   {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy),
   {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy),
diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl
index 5ec5f29577..7342053620 100644
--- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl
+++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl
@@ -42,6 +42,7 @@ do_insn(I, TempMap) ->
     #fp_binary{} -> do_fp_binary(I, TempMap);
     #fp_unary{} -> do_fp_unary(I, TempMap);
     #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap);
+    #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap);
     _ -> {[I], false}
   end.
 
@@ -81,15 +82,22 @@ do_fp_unary(I=#fp_unary{dst=Dst,src=Src}, TempMap) ->
   {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}.
 
 do_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, TempMap) ->
-  case temp_is_spilled(Dst, TempMap) of
-    true ->
-      {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap),
-      NewI = I#pseudo_fmove{src=NewSrc},
-      {FixSrc ++ [NewI], DidSpill};
+  case temp_is_spilled(Src, TempMap)
+    andalso temp_is_spilled(Dst, TempMap)
+  of
+    true -> % Turn into pseudo_spill_fmove
+      Temp = clone(Src),
+      NewI = #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src},
+      {[NewI], true};
     _ ->
       {[I], false}
   end.
 
+do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = temp_is_spilled(Temp, TempMap),
+  {[I], false}.
+
 %%% Fix Dst and Src operands.
 
 fix_src(Src, TempMap) ->
diff --git a/lib/hipe/ppc/hipe_ppc_subst.erl b/lib/hipe/ppc/hipe_ppc_subst.erl
index 1cd18b5c01..e282b22774 100644
--- a/lib/hipe/ppc/hipe_ppc_subst.erl
+++ b/lib/hipe/ppc/hipe_ppc_subst.erl
@@ -48,6 +48,8 @@ insn_temps(T, I) ->
       #pseudo_call_prepare{} -> I;
       #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)};
       #pseudo_move{dst=D,src=S} -> I#pseudo_move{dst=T(D),src=T(S)};
+      #pseudo_spill_move{dst=D,temp=U,src=S} ->
+	  I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)};
       #pseudo_tailcall{func=F,stkargs=Stk} when not is_record(F, ppc_temp) ->
 	  I#pseudo_tailcall{stkargs=lists:map(A,Stk)};
       #pseudo_tailcall_prepare{} -> I;
@@ -62,7 +64,9 @@ insn_temps(T, I) ->
       #fp_binary{dst=D,src1=L,src2=R} ->
 	  I#fp_binary{dst=T(D),src1=T(L),src2=T(R)};
       #fp_unary{dst=D,src=S} -> I#fp_unary{dst=T(D),src=T(S)};
-      #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)}
+      #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)};
+      #pseudo_spill_fmove{dst=D,temp=U,src=S} ->
+	  I#pseudo_spill_fmove{dst=T(D),temp=T(U),src=T(S)}
   end.
 
 -spec oper_temps(subst_fun(), oper()) -> oper().
diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile
index 209f230a9b..81a92e5d35 100644
--- a/lib/hipe/regalloc/Makefile
+++ b/lib/hipe/regalloc/Makefile
@@ -50,8 +50,10 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \
 	  hipe_optimistic_regalloc \
 	  hipe_coalescing_regalloc \
 	  hipe_graph_coloring_regalloc \
+	  hipe_range_split \
 	  hipe_regalloc_loop \
 	  hipe_regalloc_prepass \
+	  hipe_restore_reuse \
 	  hipe_ls_regalloc \
 	  hipe_ppc_specific hipe_ppc_specific_fp \
 	  hipe_sparc_specific hipe_sparc_specific_fp \
diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 9c94539bc6..d592ba391c 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -30,6 +30,7 @@
 	 def_use/2,
 	 is_arg/2,	%% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	%% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -50,12 +51,19 @@
 -export([check_and_rewrite/3,
 	 check_and_rewrite/4]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 %%----------------------------------------------------------------------------
 
 -include("../flow/cfg.hrl").
@@ -126,8 +134,8 @@ temp0(_) ->
 all_precoloured(Ctx) ->
   allocatable(Ctx).
 
-is_precoloured(Reg, Ctx) ->
-  lists:member(Reg,all_precoloured(Ctx)).
+is_precoloured(Reg, _) ->
+  hipe_amd64_registers:is_precoloured_sse2(Reg).
 
 physical_name(Reg, _) ->
   Reg.
@@ -152,6 +160,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% AMD64 stuff
 
 def_use(Instruction, _) ->
@@ -184,10 +195,34 @@ is_move(Instruction, _) ->
 	andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst);
     false -> false
   end.
+
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_fmove(Instruction).
  
 reg_nr(Reg, _) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index cef22e5af9..7ebc6aa336 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_arm_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_arm_cfg:branch_preds(Branch).
+
 %% ARM stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,33 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_arm:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_arm:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_arm:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_arm:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_arm_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(arm).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(arm).
 
diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl
index e8ccbec9f1..b8f0a1974c 100644
--- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl
+++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl
@@ -914,7 +914,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) ->
 %% limit are extremely expensive.
 
 getCost(Node, IG, SpillLimit) ->
-  case Node > SpillLimit of
+  case Node >= SpillLimit of
     true -> inf;
     false -> hipe_ig:node_spill_cost(Node, IG)
   end.
diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl
index 07aa812f4a..f82d3a2cbc 100644
--- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl
+++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl
@@ -209,8 +209,8 @@ color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target,
 
   %% Any nodes above the spillimit must be colored first...
   MustNotSpill = 
-    if NumNodes > SpillLimit+1 ->
-	sort_on_degree(lists:seq(SpillLimit+1,NumNodes-1) -- Low,IG);
+    if NumNodes > SpillLimit ->
+	sort_on_degree(lists:seq(SpillLimit,NumNodes-1) -- Low,IG);
        true -> []
     end,
       
@@ -401,7 +401,7 @@ spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) ->
 	    true ->
 	      spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target);
 	    false ->
-	      if N > SpillLimit ->
+	      if N >= SpillLimit ->
 		  spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target);
 		 true ->
 		  [{spill_cost_of(N,Spill)/Deg,N} | 
diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl
index b96920cbcf..a019c46b90 100644
--- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl
+++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl
@@ -1933,7 +1933,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) ->
 %% limit are extremely expensive.
 
 getCost(Node, IG, SpillLimit) ->
-  case Node > SpillLimit of
+  case Node >= SpillLimit of
     true -> inf;
     false ->
       SpillCost = hipe_ig:node_spill_cost(Node, IG),
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index a6450b4d96..81bb551bd2 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,24 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_ppc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_move(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 23cb6c0318..dcfdf6592c 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -108,6 +116,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_ppc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_ppc_cfg:branch_preds(Instr).
+
 %% PowerPC stuff
 
 def_use(I, Ctx) ->
@@ -125,9 +136,24 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_ppc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_ppc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_ppc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_ppc:mk_pseudo_fmove(Dst, Src).
+
+mk_goto(Label, _) ->
+  hipe_ppc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(ppc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(ppc).
 
diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl
new file mode 100644
index 0000000000..39b086d9f7
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_range_split.erl
@@ -0,0 +1,1187 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       TEMPORARY LIVE RANGE SPLITTING PASS
+%%
+%% Live range splitting is useful to allow a register allocator to allocate a
+%% temporary to register for a part of its lifetime, even if it cannot be for
+%% the entirety. This improves register allocation quality, at the cost of
+%% making the allocation problem more time and memory intensive to solve.
+%%
+%% Optimal allocation can be achieved if all temporaries are split at every
+%% program point (between all instructions), but this makes register allocation
+%% infeasably slow in practice. Instead, this module uses heuristics to choose
+%% which temporaries should have their live ranges split, and at which points.
+%%
+%% The range splitter only considers temps which are live during a call
+%% instruction, since they're known to be spilled. The control-flow graph is
+%% partitioned at call instructions and splitting decisions are made separately
+%% for each partition. The register copy of a temp (if any) gets a separate name
+%% in each partition.
+%%
+%% There are three different ways the range splitter may choose to split a
+%% temporary in a program partition:
+%%
+%%  * Mode1: Spill the temp before calls, and restore it after them
+%%  * Mode2: Spill the temp after definitions, restore it after calls
+%%  * Mode3: Spill the temp after definitions, restore it before uses
+%%
+%% To pick which of these should be used for each temp×partiton pair, the range
+%% splitter uses a cost function. The cost is simply the sum of the cost of all
+%% expected stack accesses, and the cost for an individual stack access is based
+%% on the probability weight of the basic block that it resides in. This biases
+%% the range splitter so that it attempts moving stack accesses from a functions
+%% hot path to the cold path.
+%%
+%% The heuristic has a couple of tuning knobs, adjusting its preference for
+%% different spilling modes, aggressiveness, and how much influence the basic
+%% block probability weights have.
+%%
+%% Edge case not handled: Call instructions directly defining a pseudo. In that
+%% case, if that pseudo has been selected for mode2 spills, no spill is inserted
+%% after the call.
+-module(hipe_range_split).
+
+-export([split/5]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+%% -define(DEBUG, 1).
+-include("../main/hipe.hrl").
+
+%% Heuristic tuning constants
+-define(DEFAULT_MIN_GAIN, 1.1).    % option: range_split_min_gain
+-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge
+-define(DEFAULT_WEIGHT_POWER, 2).  % option: range_split_weight_power
+-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)).
+-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)).
+-define(HEUR_MAX_TEMPS, 20000).
+
+-type target_cfg()       :: any().
+-type target_instr()     :: any().
+-type target_temp()      :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type liveset()          :: ordsets:ordset(temp()).
+-type temp()             :: non_neg_integer().
+-type label()            :: non_neg_integer().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context(),
+	    comp_options())
+	   -> target_cfg().
+split(TCFG0, Liveness, TargetMod, TargetContext, Options) ->
+  Target = {TargetMod, TargetContext},
+  NoTemps = number_of_temporaries(TCFG0, Target),
+  if NoTemps > ?HEUR_MAX_TEMPS ->
+      ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n",
+		 [?MODULE, NoTemps]),
+      hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext);
+     true ->
+      Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options),
+      {CFG0, Temps} = convert(TCFG0, Target),
+      Avail = avail_analyse(TCFG0, Liveness, Target),
+      Defs = def_analyse(CFG0, TCFG0),
+      RDefs = rdef_analyse(CFG0),
+      PLive = plive_analyse(CFG0),
+      {CFG, DUCounts, Costs, DSets0} =
+	scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target),
+      {DSets, _} = hipe_dsets:to_map(DSets0),
+      Renames = decide(DUCounts, Costs, Target, Options),
+      rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	      Temps)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Internal program representation
+%%
+%% Second pass: Convert cfg to internal representation
+
+-record(cfg, {
+	  rpo_labels :: [label()],
+	  bbs        :: #{label() => bb()}
+	 }).
+-type cfg() :: #cfg{}.
+
+cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS).
+
+cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO).
+
+-record(bb, {
+	  code     :: [code_elem()],
+	  %% If the last instruction of code defines all allocatable registers
+	  has_call :: boolean(),
+	  succ     :: [label()]
+	 }).
+-type bb() :: #bb{}.
+-type code_elem() :: instr() | mode2_spills() | mode3_restores().
+
+bb_code(#bb{code=Code}) -> Code.
+bb_has_call(#bb{has_call=HasCall}) -> HasCall.
+bb_succ(#bb{succ=Succ}) -> Succ.
+
+bb_butlast(#bb{code=Code}) ->
+  bb_butlast_1(Code).
+
+bb_butlast_1([_Last]) -> [];
+bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)].
+
+bb_last(#bb{code=Code}) -> lists:last(Code).
+
+-record(instr, {
+	  i   :: target_instr(),
+	  def :: ordsets:ordset(temp()),
+	  use :: ordsets:ordset(temp())
+	 }).
+-type instr() :: #instr{}.
+
+-record(mode2_spills, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode2_spills() :: #mode2_spills{}.
+
+-record(mode3_restores, {
+	  temps :: ordsets:ordset(temp())
+	}).
+-type mode3_restores() :: #mode3_restores{}.
+
+-spec convert(target_cfg(), target()) -> {cfg(), temps()}.
+convert(CFG, Target) ->
+  RPO = reverse_postorder(CFG, Target),
+  {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []),
+  {#cfg{rpo_labels = RPO,
+	bbs = maps:from_list(BBsList)},
+   Temps}.
+
+convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps};
+convert_bbs([L|Ls], CFG, Target, Temps0, Acc) ->
+  Succs = hipe_gen_cfg:succ(CFG, L),
+  TBB = bb(CFG, L, Target),
+  TCode = hipe_bb:code(TBB),
+  {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []),
+  HasCall = defines_all_alloc(Last#instr.i, Target),
+  BB = #bb{code = Code,
+	   has_call = HasCall,
+	   succ = Succs},
+  convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]).
+
+convert_code([], _Target, Temps, [Last|_]=Acc) ->
+  {lists:reverse(Acc), Last, Temps};
+convert_code([TI|TIs], Target, Temps0, Acc) ->
+  {TDef, TUse} = def_use(TI, Target),
+  I = #instr{i = TI,
+	     def = ordsets:from_list(reg_names(TDef, Target)),
+	     use = ordsets:from_list(reg_names(TUse, Target))},
+  Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)),
+  convert_code(TIs, Target, Temps, [I|Acc]).
+
+-type temps() :: #{temp() => target_temp()}.
+add_temps([], _Target, Temps) -> Temps;
+add_temps([T|Ts], Target, Temps) ->
+  add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL
+%% splits)
+-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}.
+-type defs()  :: #{label() => defsetf()}.
+
+-spec def_analyse(cfg(), target_cfg()) -> defs().
+def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) ->
+  Defs0 = def_init(CFG),
+  def_dataf(RPO, TCFG, Defs0).
+
+-spec def_init(cfg()) -> defsi().
+def_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     false -> def_init_scan(bb_code(BB), defseti_new());
+	     true ->
+	       {call, def_init_scan(bb_butlast(BB), defseti_new()),
+		defseti_from_ordset((bb_last(BB))#instr.def)}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]).
+
+def_init_scan([], Defset) -> Defset;
+def_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = defseti_add_ordset(Def, Defset0),
+  def_init_scan(Is, Defset).
+
+-spec def_dataf([label()], target_cfg(), defsi()) -> defs().
+def_dataf(Labels, TCFG, Defs0) ->
+  case def_dataf_once(Labels, TCFG, Defs0, 0) of
+    {Defs, 0} ->
+      def_finalise(Defs);
+    {Defs, _Changed} ->
+      def_dataf(Labels, TCFG, Defs)
+  end.
+
+-spec def_finalise(defsi()) -> defs().
+def_finalise(Defs) ->
+  maps:from_list([{K, defseti_finalise(BL)}
+		  || {K, {call, BL, _}} <- maps:to_list(Defs)]).
+
+-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer())
+		    -> {defsi(), non_neg_integer()}.
+def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed};
+def_dataf_once([L|Ls], TCFG, Defs0, Changed0) ->
+  AddPreds =
+    fun(Defset1) ->
+	lists:foldl(fun(P, Defset2) ->
+			defseti_union(defout(P, Defs0), Defset2)
+		    end, Defset1, hipe_gen_cfg:pred(TCFG, L))
+    end,
+  Defset =
+    case Defset0 = maps:get(L, Defs0) of
+      {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout};
+      _ -> AddPreds(Defset0)
+    end,
+  Changed = case Defset =:= Defset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed).
+
+-spec defout(label(), defsi()) -> defseti().
+defout(L, Defs) ->
+  case maps:get(L, Defs) of
+    {call, _DefButLast, Defout} -> Defout;
+    Defout -> Defout
+  end.
+
+-spec defbutlast(label(), defs()) -> defsetf().
+defbutlast(L, Defs) -> maps:get(L, Defs).
+
+-spec defseti_new() -> defseti().
+-spec defseti_union(defseti(), defseti()) -> defseti().
+-spec defseti_add_ordset(ordset:ordset(temp()), defseti()) -> defseti().
+-spec defseti_from_ordset(ordset:ordset(temp())) -> defseti().
+-spec defseti_finalise(defseti()) -> defsetf().
+-spec defsetf_member(temp(), defsetf()) -> boolean().
+-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf())
+			      -> ordsets:ordset(temp()).
+
+-type defseti() :: bitord().
+defseti_new() -> bitord_new().
+defseti_union(A, B) -> bitord_union(A, B).
+defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D).
+defseti_from_ordset(OS) -> bitord_from_ordset(OS).
+defseti_finalise(D) -> bitarr_from_bitord(D).
+
+-type defsetf() :: bitarr().
+defsetf_member(E, D) -> bitarr_get(E, D).
+
+defsetf_intersect_ordset([], _D) -> [];
+defsetf_intersect_ordset([E|Es], D) ->
+  case bitarr_get(E, D) of
+    true  -> [E|defsetf_intersect_ordset(Es,D)];
+    false ->    defsetf_intersect_ordset(Es,D)
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines
+%% in mode2)
+-type rdefsi() :: #{label() =>
+		     {call, rdefseti(), [label()]}
+		   | {nocall, rdefseti(), rdefseti(), [label()]}}.
+-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}.
+
+-spec rdef_analyse(cfg()) -> rdefs().
+rdef_analyse(CFG = #cfg{rpo_labels=RPO}) ->
+  Defs0 = rdef_init(CFG),
+  PO = rdef_postorder(RPO, CFG, []),
+  rdef_dataf(PO, Defs0).
+
+%% Filter out 'call' labels, since they don't change
+-spec rdef_postorder([label()], cfg(), [label()]) -> [label()].
+rdef_postorder([], _CFG, Acc) -> Acc;
+rdef_postorder([L|Ls], CFG, Acc) ->
+  case bb_has_call(cfg_bb(L, CFG)) of
+    true  -> rdef_postorder(Ls, CFG, Acc);
+    false -> rdef_postorder(Ls, CFG, [L|Acc])
+  end.
+
+-spec rdef_init(cfg()) -> rdefsi().
+rdef_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [{L, case HasCall of
+	   true ->
+	     Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()),
+	     {call, Defin, Succs};
+	   false ->
+	     Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()),
+	     {nocall, Gen, rdefseti_top(), Succs}
+	 end}
+     || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti().
+rdef_init_scan([], Defset) -> Defset;
+rdef_init_scan([#instr{def=Def}|Is], Defset0) ->
+  Defset = rdefseti_add_ordset(Def, Defset0),
+  rdef_init_scan(Is, Defset).
+
+-spec rdef_dataf([label()], rdefsi()) -> rdefs().
+rdef_dataf(Labels, Defs0) ->
+  case rdef_dataf_once(Labels, Defs0, 0) of
+    {Defs, 0} ->
+      rdef_finalise(Defs);
+    {Defs, _Changed} ->
+      rdef_dataf(Labels, Defs)
+  end.
+
+-spec rdef_finalise(rdefsi()) -> rdefs().
+rdef_finalise(Defs) ->
+  maps:map(fun(L, V) ->
+	       Succs = rsuccs_val(V),
+	       Defout0 = rdefout_intersect(L, Defs, rdefseti_top()),
+	       {final, rdefset_finalise(Defout0), Succs}
+	   end, Defs).
+
+-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer())
+		     -> {rdefsi(), non_neg_integer()}.
+rdef_dataf_once([], Defs, Changed) -> {Defs, Changed};
+rdef_dataf_once([L|Ls], Defs0, Changed0) ->
+  #{L := {nocall, Gen, Defin0, Succs}} = Defs0,
+  Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)),
+  Defset = {nocall, Gen, Defin, Succs},
+  Changed = case Defin =:= Defin0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed).
+
+-spec rdefin(label(), rdefsi()) -> rdefseti().
+rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)).
+rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin;
+rdefin_val({call, Defin, _Succs}) -> Defin.
+
+-spec rsuccs(label(), rdefsi()) -> [label()].
+rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)).
+rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs;
+rsuccs_val({call, _Defin, Succs}) -> Succs.
+
+-spec rdefout(label(), rdefs()) -> rdefsetf().
+rdefout(L, Defs) ->
+  #{L := {final, Defout, _Succs}} = Defs,
+  Defout.
+
+-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti().
+rdefout_intersect(L, Defs, Init) ->
+  lists:foldl(fun(S, Acc) ->
+		  rdefseti_intersect(rdefin(S, Defs), Acc)
+	      end, Init, rsuccs(L, Defs)).
+
+-type rdefseti() :: bitord() | top.
+rdefseti_top() -> top.
+rdefseti_empty() -> bitord_new().
+-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti().
+rdefseti_from_ordset(OS) -> bitord_from_ordset(OS).
+
+-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti().
+rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf
+rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D).
+
+-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_union(top, _) -> top;
+rdefseti_union(_, top) -> top;
+rdefseti_union(A, B) -> bitord_union(A, B).
+
+-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti().
+rdefseti_intersect(top, D) -> D;
+rdefseti_intersect(D, top) -> D;
+rdefseti_intersect(A, B) -> bitord_intersect(A, B).
+
+-type rdefsetf() :: {arr, bitarr()} | top.
+-spec rdefset_finalise(rdefseti()) -> rdefsetf().
+rdefset_finalise(top) -> top;
+rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}.
+
+%% rdefsetf_top() -> top.
+rdefsetf_empty() -> {arr, bitarr_new()}.
+
+-spec rdefsetf_add_ordset(ordset:ordset(temp()), rdefsetf()) -> rdefsetf().
+rdefsetf_add_ordset(_, top) -> top;
+rdefsetf_add_ordset(OS, {arr, Arr}) ->
+  {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}.
+
+-spec rdef_step(instr(), rdefsetf()) -> rdefsetf().
+rdef_step(#instr{def=Def}, Defset) ->
+  %% ?ASSERT(not defines_all_alloc(I, Target)),
+  rdefsetf_add_ordset(Def, Defset).
+
+-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf())
+			      -> ordsets:ordset(temp()).
+ordset_subtract_rdefsetf(_, top) -> [];
+ordset_subtract_rdefsetf(OS, {arr, Arr}) ->
+  %% Lazy implementation; could do better if OS can grow
+  lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Integer sets represented as bit sets
+%%
+%% Two representations; bitord() and bitarr()
+-define(LIMB_IX_BITS,    11).
+-define(LIMB_BITS,       (1 bsl ?LIMB_IX_BITS)).
+-define(LIMB_IX(Index),  (Index bsr ?LIMB_IX_BITS)).
+-define(BIT_IX(Index),   (Index band (?LIMB_BITS - 1))).
+-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))).
+
+%% bitord(): fast at union/2 and can be compared for equality with '=:='
+-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitord_new() -> bitord().
+bitord_new() -> [].
+
+-spec bitord_union(bitord(), bitord()) -> bitord().
+bitord_union(Lhs, Rhs) ->
+  orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs).
+
+-spec bitord_intersect(bitord(), bitord()) -> bitord().
+bitord_intersect([], _) -> [];
+bitord_intersect(_, []) -> [];
+bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) ->
+  [{K, L band R} | bitord_intersect(Ls, Rs)];
+bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK ->
+  bitord_intersect(Ls, Rs);
+bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK ->
+  bitord_intersect(Ls, Rs).
+
+-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord().
+bitord_from_ordset([]) -> [];
+bitord_from_ordset([B|Bs]) ->
+  bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)).
+
+bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) ->
+  bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B));
+bitord_from_ordset_1([B|Bs], Key, Val) ->
+  [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))];
+bitord_from_ordset_1([], Key, Val) -> [{Key, Val}].
+
+%% bitarr(): fast (enough) at get/2
+-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)).
+
+-spec bitarr_new() -> bitarr().
+bitarr_new() -> array:new({default, 0}).
+
+-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean().
+bitarr_get(Index, Array) ->
+  Limb = array:get(?LIMB_IX(Index), Array),
+  0 =/= (Limb band ?BIT_MASK(Index)).
+
+-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr().
+bitarr_set(Index, Array) ->
+  Limb0 = array:get(?LIMB_IX(Index), Array),
+  Limb = Limb0 bor ?BIT_MASK(Index),
+  array:set(?LIMB_IX(Index), Limb, Array).
+
+-spec bitarr_from_bitord(bitord()) -> bitarr().
+bitarr_from_bitord(Ord) ->
+  array:from_orddict(Ord, 0).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Sixth pass: Partition-local liveness analysis
+%%
+%% As temps are not spilled when exiting a partition in mode2, only
+%% partition-local uses need to be considered when deciding which temps need
+%% restoring at partition entry.
+
+-type plive() :: #{label() =>
+		     {call, liveset(), [label()]}
+		   | {nocall, {liveset(), liveset()}, liveset(), [label()]}}.
+
+-spec plive_analyse(cfg()) -> plive().
+plive_analyse(CFG) ->
+  Defs0 = plive_init(CFG),
+  PO = cfg_postorder(CFG),
+  plive_dataf(PO, Defs0).
+
+-spec plive_init(cfg()) -> plive().
+plive_init(#cfg{bbs = BBs}) ->
+  maps:from_list(
+    [begin
+       {L, case HasCall of
+	     true ->
+	       {Gen, _} = plive_init_scan(bb_code(BB)),
+	       {call, Gen, Succs};
+	     false ->
+	       GenKill = plive_init_scan(bb_code(BB)),
+	       {nocall, GenKill, liveset_empty(), Succs}
+	   end}
+     end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]).
+
+-spec plive_init_scan([instr()]) -> {liveset(), liveset()}.
+plive_init_scan([]) -> {liveset_empty(), liveset_empty()};
+plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) ->
+  {Gen0, Kill0} = plive_init_scan(Is),
+  Gen1 = liveset_subtract(Gen0, InstrKill),
+  Gen = liveset_union(Gen1, InstrGen),
+  Kill1 = liveset_union(Kill0, InstrKill),
+  Kill = liveset_subtract(Kill1, InstrGen),
+  {Gen, Kill}.
+
+-spec plive_dataf([label()], plive()) -> plive().
+plive_dataf(Labels, PLive0) ->
+  case plive_dataf_once(Labels, PLive0, 0) of
+    {PLive, 0} -> PLive;
+    {PLive, _Changed} ->
+      plive_dataf(Labels, PLive)
+  end.
+
+-spec plive_dataf_once([label()], plive(), non_neg_integer()) ->
+			  {plive(), non_neg_integer()}.
+plive_dataf_once([], PLive, Changed) -> {PLive, Changed};
+plive_dataf_once([L|Ls], PLive0, Changed0) ->
+  Liveset =
+    case Liveset0 = maps:get(L, PLive0) of
+      {call, Livein, Succs} ->
+	{call, Livein, Succs};
+      {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} ->
+	Liveout = pliveout(L, PLive0),
+	Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)),
+	{nocall, GenKill, Livein, Succs}
+    end,
+  Changed = case Liveset =:= Liveset0 of
+	      true  -> Changed0;
+	      false -> Changed0+1
+	    end,
+  plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed).
+
+-spec pliveout(label(), plive()) -> liveset().
+pliveout(L, PLive) ->
+  liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]).
+
+-spec psuccs(label(), plive()) -> [label()].
+psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)).
+psuccs_val({call, _Livein, Succs}) -> Succs;
+psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs.
+
+-spec plivein(label(), plive()) -> liveset().
+plivein(L, PLive) -> plivein_val(maps:get(L, PLive)).
+plivein_val({call, Livein, _Succs}) -> Livein;
+plivein_val({nocall, _GenKill, Livein, _Succs}) ->  Livein.
+
+liveset_empty() -> ordsets:new().
+liveset_subtract(A, B) -> ordsets:subtract(A, B).
+liveset_union(A, B) -> ordsets:union(A, B).
+liveset_union(LivesetList) -> ordsets:union(LivesetList).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Third pass: Compute dataflow analyses required for placing mode3
+%% spills/restores.
+%% Reuse analysis implementation in hipe_restore_reuse.
+%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one
+%% too.
+-type avail() :: hipe_restore_reuse:avail().
+
+-spec avail_analyse(target_cfg(), liveness(), target()) -> avail().
+avail_analyse(CFG, Liveness, Target) ->
+  hipe_restore_reuse:analyse(CFG, Liveness, Target).
+
+-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()).
+mode3_split_in_block(L, Avail) ->
+  hipe_restore_reuse:split_in_block(L, Avail).
+
+-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()).
+mode3_block_renameset(L, Avail) ->
+  hipe_restore_reuse:renamed_in_block(L, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Seventh pass
+%%
+%% Compute program space partitioning, collect information required by the
+%% heuristic.
+-type part_key() :: label().
+-type part_dsets() :: hipe_dsets:dsets(part_key()).
+-type part_dsets_map() :: #{part_key() => part_key()}.
+-type ducounts() :: #{part_key() => ducount()}.
+
+-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(),
+	   target()) -> {cfg(), ducounts(), costs(), part_dsets()}.
+scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) ->
+  #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0,
+  CFG = CFG0#cfg{bbs=#{}}, % kill reference
+  DSets0 = hipe_dsets:new(Labels),
+  Costs0 = costs_new(),
+  {BBs, DUCounts0, Costs1, DSets1} =
+    scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail,
+	     Target, #{}, Costs0, DSets0, []),
+  {RLList, DSets2} = hipe_dsets:to_rllist(DSets1),
+  {Costs, DSets} = costs_map_roots(DSets2, Costs1),
+  DUCounts = collect_ducounts(RLList, DUCounts0, #{}),
+  {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}.
+
+-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts())
+		      -> ducounts().
+collect_ducounts([], _, Acc) -> Acc;
+collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) ->
+  DUCount = lists:foldl(
+	      fun(Key, FAcc) ->
+		  ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc)
+	      end, ducount_new(), Ls),
+  collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}).
+
+-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(),
+	       rdefs(), avail(), target(), ducounts(), costs(), part_dsets(),
+	       [{label(), bb()}])
+	      -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}.
+scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target,
+	 DUCounts, Costs, DSets, Acc) ->
+  {Acc, DUCounts, Costs, DSets};
+scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target,
+	 DUCounts0, Costs0, DSets0, Acc) ->
+  Wt = weight(L, Weights),
+  {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} =
+    case bb_has_call(BB) of
+      false ->
+	DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end,
+			     DSets0, bb_succ(BB)),
+	{DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs),
+	 liveout(Liveness, L, Target)};
+      true ->
+	LastI = #instr{def=LastDef} = bb_last(BB),
+	LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef),
+	%% We can omit the spill of a temp that has not been defined since the
+	%% last time it was spilled
+	SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)),
+	Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0),
+	Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) ->
+				 SLivein = livein(Liveness, S, Target),
+				 SPLivein = plivein(S, PLive),
+				 SWt = weight_scaled(L, BranchWt, Weights),
+				 Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2),
+				 costs_insert(entry2, S, SWt, SPLivein, Costs3)
+			     end, Costs1, branch_preds(LastI#instr.i, Target)),
+	{DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore}
+    end,
+  Mode3Splits = mode3_split_in_block(L, Avail),
+  {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []),
+  {Code, DUCount, Mode2Spills} =
+    scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode),
+  DUCounts = DUCounts0#{L => DUCount},
+  M2SpillSet = ordsets:from_list(Mode2Spills),
+  Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5),
+  Mode3Renames = mode3_block_renameset(L, Avail),
+  Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6),
+  Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7),
+  Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8),
+  scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts,
+	   Costs, DSets, [{L,BB#bb{code=Code}}|Acc]).
+
+-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs())
+			     -> costs().
+add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) ->
+  Unsplit = orddict_without_ordset(Mode3Renames,
+				   orddict:from_list(ducount_to_list(DUCount))),
+  add_unsplit_mode3_costs_1(Unsplit, L, Costs).
+
+-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs())
+			       -> costs().
+add_unsplit_mode3_costs_1([], _L, Costs) -> Costs;
+add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) ->
+  add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)).
+
+%% @doc Returns a new orddict without keys in Set and their associated values.
+-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			    -> orddict:orddict(K, V).
+orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K ->
+  orddict_without_ordset(Ss, Dict);
+orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K ->
+  [D|orddict_without_ordset(Set, Ds)];
+orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K
+  orddict_without_ordset(Ss, Ds);
+orddict_without_ordset(_, []) -> [];
+orddict_without_ordset([], Dict) -> Dict.
+
+%% Scans the code forward, collecting and inserting mode3 restores
+-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()),
+		  [code_elem()])
+		 -> {[code_elem()], ordsets:ordset(temp())}.
+scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored};
+scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) ->
+  #instr{def=Def, use=Use} = I,
+  {ToRestore, SplitHere1} =
+    lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0),
+  SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1),
+  Acc =
+    case ToRestore of
+      [] -> [I | Acc0];
+      _  -> [I, #mode3_restores{temps=ToRestore} | Acc0]
+    end,
+  scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc).
+
+%% Scans the code backwards, collecting def/use counts and mode2 spills
+-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(),
+	      [temp()], [code_elem()])
+	     -> {[code_elem()], ducount(), [temp()]}.
+scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) ->
+  {Acc, DUCount, Spills};
+scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) ->
+  scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]);
+scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) ->
+  #instr{def=Def,use=Use} = I,
+  DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)),
+  Livein = liveness_step(I, Liveout),
+  RDefin = rdef_step(I, RDefout),
+  %% The temps that would be spilled after I in mode 2
+  NewSpills = ordset_subtract_rdefsetf(
+		ordsets:intersection(Def, Liveout),
+		RDefout),
+  ?ASSERT(NewSpills =:= (NewSpills -- Spills0)),
+  Spills = NewSpills ++ Spills0,
+  Acc1 = case NewSpills of
+	   [] -> Acc0;
+	   _ -> [#mode2_spills{temps=NewSpills}|Acc0]
+	 end,
+  scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]).
+
+-spec liveness_step(instr(), liveset()) -> liveset().
+liveness_step(#instr{def=Def, use=Use}, Liveout) ->
+  ordsets:union(Use, ordsets:subtract(Liveout, Def)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% First pass: compute basic-block weighting
+
+-type weights() :: no_bb_weights
+		 | {hipe_bb_weights:bb_weights(), float()}.
+
+-spec weight(label(), weights()) -> float().
+weight(L, Weights) -> weight_scaled(L, 1.0, Weights).
+
+-spec compute_weights(target_cfg(), target_module(), target_context(),
+		      comp_options()) -> weights().
+compute_weights(CFG, TargetMod, TargetContext, Options) ->
+  case proplists:get_bool(range_split_weights, Options) of
+    false -> no_bb_weights;
+    true ->
+      {hipe_bb_weights:compute(CFG, TargetMod, TargetContext),
+       ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power,
+					     Options, ?DEFAULT_WEIGHT_POWER))}
+  end.
+
+-spec weight_scaled(label(), float(), weights()) -> float().
+weight_scaled(_L, _Scale, no_bb_weights) -> 1.0;
+weight_scaled(L, Scale, {Weights, Const}) ->
+  Wt0 = hipe_bb_weights:weight(L, Weights) * Scale,
+  Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0),
+  ?WEIGHT_FUN(Wt, Const).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Heuristic splitting decision.
+%%
+%% Decide which temps to split, in which parts, and pick new names for them.
+-type spill_mode() :: mode1 % Spill temps at partition exits
+		    | mode2 % Spill temps at definitions
+		    | mode3.% Spill temps at definitions, restore temps at uses
+-type ren() :: #{temp() => {spill_mode(), temp()}}.
+-type renames() :: #{label() => ren()}.
+
+-record(heur_par, {
+	  mode1_fudge :: float(),
+	  min_gain    :: float()
+	  }).
+-type heur_par() :: #heur_par{}.
+
+-spec decide(ducounts(), costs(), target(), comp_options()) -> renames().
+decide(DUCounts, Costs, Target, Options) ->
+  Par = #heur_par{
+	   mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options,
+					     ?DEFAULT_MODE1_FUDGE),
+	   min_gain    = proplists:get_value(range_split_min_gain, Options,
+					     ?DEFAULT_MIN_GAIN)},
+  decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}).
+
+-spec decide_parts([{part_key(), ducount()}], costs(), target(),
+		   heur_par(), renames())
+		  -> renames().
+decide_parts([], _Costs, _Target, _Par, Acc) -> Acc;
+decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) ->
+  Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par,
+			#{}),
+  decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}).
+
+-spec decide_temps([{temp(), float()}], part_key(), costs(), target(),
+		   heur_par(), ren())
+		  -> ren().
+decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc;
+decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) ->
+  SpillCost1 = costs_query(Temp, entry1, Part, Costs)
+    + costs_query(Temp, exit, Part, Costs),
+  SpillCost2 = costs_query(Temp, entry2, Part, Costs)
+    + costs_query(Temp, spill, Part, Costs),
+  SpillCost3 = costs_query(Temp, restore, Part, Costs),
+  Acc =
+    %% SpillCost1 =:= 0.0 usually means the temp is local to the partition;
+    %% hence no need to split it
+    case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S)
+      andalso (not is_precoloured(Temp, Target))
+      andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain)
+	       orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain))
+    of
+      false -> Acc0;
+      true ->
+	Mode =
+	  if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2,
+	     Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 ->
+	      mode1;
+	     SpillCost2 < SpillCost3 ->
+	      mode2;
+	     true ->
+	      mode3
+	  end,
+	Acc0#{Temp => {Mode, new_reg_nr(Target)}}
+  end,
+  decide_temps(Ts, Part, Costs, Target, Par, Acc).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Eighth pass: Rewrite program performing range splitting.
+
+-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(),
+	      avail(), part_dsets_map(), renames(), temps())
+	     -> target_cfg().
+rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets,
+	Renames, Temps) ->
+  rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets,
+	      Renames, Temps, TCFG).
+
+-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(),
+		  avail(), part_dsets_map(), renames(), temps(), target_cfg())
+		 -> target_cfg().
+rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames,
+	    _Temps, TCFG) ->
+  TCFG;
+rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames,
+	    Temps, TCFG0) ->
+  Code0Rev = lists:reverse(bb_code(BB)),
+  EntryRen = maps:get(maps:get(L,DSets), Renames),
+  M3Ren = mode3_block_renameset(L, Avail),
+  SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren),
+  Fun = fun(I) -> subst_temps(SubstFun, I, Target) end,
+  {Code, TCFG} =
+    case bb_has_call(BB) of
+      false ->
+	Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target,
+			       []),
+	{Code1, TCFG0};
+      true ->
+	CallI0 = hd(Code0Rev),
+	Succ = bb_succ(BB),
+	{CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets,
+					 Renames, Temps, CallI0#instr.i, TCFG0),
+	Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)),
+	Defout = defbutlast(L, Defs),
+	SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target),
+	Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps,
+			       Target, []),
+	Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]),
+	{Code2, TCFG1}
+    end,
+  TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code),
+  rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps,
+	      update_bb(TCFG, L, TBB, Target)).
+
+-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(),
+		     ordsets:ordset(temp()), temps(), target(),
+		     [target_instr()])
+		    -> [target_instr()].
+rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc;
+rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) ->
+  Acc =
+    case I of
+      #instr{i=TI} -> [Fun(TI)|Acc0];
+      #mode2_spills{temps=Mode2Spills} ->
+	add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0);
+      #mode3_restores{temps=Mode3Restores} ->
+	add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0)
+    end,
+  rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc).
+
+-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(),
+		       ordsets:ordset(temp()), temps(), [target_instr()])
+		      -> [target_instr()].
+add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc;
+add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) ->
+  Acc =
+    case Ren of
+      #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 ->
+	case Mode =/= mode3 orelse lists:member(R, M3Ren) of
+	  false -> Acc0;
+	  true ->
+	    #{R := T} = Temps,
+	    SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target),
+	    [SpillInstr|Acc0]
+	end;
+    #{} ->
+	Acc0
+  end,
+  add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc).
+
+-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(),
+			 [target_instr()])
+			-> [target_instr()].
+add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc;
+add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) ->
+  case Ren of
+    #{R := {mode3, NewName}} ->
+      #{R := T} = Temps,
+      RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target),
+      add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]);
+    #{} ->
+      add_mode3_restores(Rs, Target, Ren, Temps, Acc)
+  end.
+
+-type rewrite_fun() :: fun((target_instr()) -> target_instr()).
+-type subst_fun() :: fun((target_temp()) -> target_temp()).
+-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun().
+rewrite_subst_fun(Target, Ren, M3Ren) ->
+  fun(Temp) ->
+      Reg = reg_nr(Temp, Target),
+      case Ren of
+	#{Reg := {Mode, NewName}} ->
+	  case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of
+	    false -> Temp;
+	    true -> update_reg_nr(NewName, Temp, Target)
+	  end;
+	#{} -> Temp
+      end
+  end.
+
+-type spillmap() :: [{temp(), target_instr()}].
+-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target())
+		 -> spillmap().
+mk_spillmap(Ren, Livein, Defout, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)}
+   end || {Reg, {mode1, NewName}} <- maps:to_list(Ren),
+	  lists:member(Reg, Livein), defsetf_member(Reg, Defout)].
+
+-spec mk_restores(ren(), liveset(), liveset(), temps(), target())
+		 -> [target_instr()].
+mk_restores(Ren, Livein, PLivein, Temps, Target) ->
+  [begin
+     Temp = maps:get(Reg, Temps),
+     mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target)
+   end || {Reg, {Mode, NewName}} <- maps:to_list(Ren),
+	  (       (Mode =:= mode1 andalso lists:member(Reg, Livein ))
+	   orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))].
+
+-spec inject_restores([label()], target(), liveness(), plive(),
+		      part_dsets_map(), renames(), temps(), target_instr(),
+		      target_cfg())
+		     -> {target_instr(), target_cfg()}.
+inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI,
+		TCFG) ->
+  {CFTI, TCFG};
+inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0,
+		TCFG0) ->
+  Ren = maps:get(maps:get(L,DSets), Renames),
+  Livein = livein(Liveness, L, Target),
+  PLivein = plivein(L, PLive),
+  {CFTI, TCFG} =
+    case mk_restores(Ren, Livein, PLivein, Temps, Target) of
+      [] -> {CFTI0, TCFG0}; % optimisation
+      Restores ->
+	RestBBLbl = new_label(Target),
+	Code = Restores ++ [mk_goto(L, Target)],
+	CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target),
+	TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target),
+	{CFTI1, TCFG1}
+    end,
+  inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI,
+		  TCFG).
+
+%% Heuristic. Move spills up until we meet the edge of the BB or a definition of
+%% that temp.
+-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()])
+		 -> [target_instr()].
+lift_spills([], _Target, SpillMap, Acc) ->
+  [SpillI || {_, SpillI} <- SpillMap] ++ Acc;
+lift_spills([I|Is], Target, SpillMap0, Acc) ->
+  Def = reg_defines(I, Target),
+  {Spills0, SpillMap} =
+    lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0),
+  Spills = [SpillI || {_, SpillI} <- Spills0],
+  lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]).
+
+reg_defines(I, Target) ->
+  reg_names(defines(I,Target), Target).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Costs ADT
+%%
+%% Keeps track of cumulative cost of spilling temps in particular partitions
+%% using particular spill modes.
+-type cost_map() :: #{[part_key()|temp()] => float()}.
+-type cost_key() :: entry1 | entry2 | exit | spill | restore.
+-record(costs, {entry1  = #{} :: cost_map()
+	       ,entry2  = #{} :: cost_map()
+	       ,exit    = #{} :: cost_map()
+	       ,spill   = #{} :: cost_map()
+	       ,restore = #{} :: cost_map()
+	       }).
+-type costs() :: #costs{}.
+
+-spec costs_new() -> costs().
+costs_new() -> #costs{}.
+
+-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs())
+		  -> costs().
+costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) ->
+  Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)};
+costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) ->
+  Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)};
+costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) ->
+  Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)};
+costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) ->
+  Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)};
+costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) ->
+  Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}.
+
+costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) ->
+  lists:foldl(fun(Live, CostMap1) ->
+		  map_update_counter([A|Live], Weight, CostMap1)
+	      end, CostMap0, Liveset).
+
+-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}.
+costs_map_roots(DSets0, Costs) ->
+  {Entry1,  DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1),
+  {Entry2,  DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2),
+  {Exit,    DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit),
+  {Spill,   DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill),
+  {Restore, DSets}  = costs_map_roots_1(DSets4, Costs#costs.restore),
+  {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore},
+   DSets}.
+
+costs_map_roots_1(DSets0, CostMap) ->
+  {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) ->
+				      {AR, DSets2} = hipe_dsets:find(A, DSets1),
+				      {{[AR|T], Wt}, DSets2}
+				  end, DSets0, maps:to_list(CostMap)),
+  {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}.
+
+maps_from_list_merge([], _MF, Acc) -> Acc;
+maps_from_list_merge([{K,V}|Ps], MF, Acc) ->
+  maps_from_list_merge(Ps, MF, case Acc of
+				 #{K := OV} -> Acc#{K := MF(V, OV)};
+				 #{}        -> Acc#{K => V}
+			       end).
+
+-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float().
+costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) ->
+  costs_query_1(Temp, Part, Entry1);
+costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) ->
+  costs_query_1(Temp, Part, Entry2);
+costs_query(Temp, exit, Part, #costs{exit=Exit}) ->
+  costs_query_1(Temp, Part, Exit);
+costs_query(Temp, spill, Part, #costs{spill=Spill}) ->
+  costs_query_1(Temp, Part, Spill);
+costs_query(Temp, restore, Part, #costs{restore=Restore}) ->
+  costs_query_1(Temp, Part, Restore).
+
+costs_query_1(Temp, Part, CostMap) ->
+  Key = [Part|Temp],
+  case CostMap of
+    #{Key := Wt} -> Wt;
+    #{} -> 0.0
+  end.
+
+-spec map_update_counter(Key, number(), #{Key => number(), OK => OV})
+			-> #{Key := number(), OK => OV}.
+map_update_counter(Key, Incr, Map) ->
+  case Map of
+    #{Key := Orig} -> Map#{Key := Orig + Incr};
+    #{}            -> Map#{Key => Incr}
+  end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Def and use counting ADT
+-type ducount() :: #{temp() => float()}.
+
+-spec ducount_new() -> ducount().
+ducount_new() -> #{}.
+
+-spec ducount_add([temp()], float(), ducount()) -> ducount().
+ducount_add([], _Weight, DUCount) -> DUCount;
+ducount_add([T|Ts], Weight, DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := Count} -> DUCount0#{T := Count + Weight};
+      #{}           -> DUCount0#{T => Weight}
+    end,
+  ducount_add(Ts, Weight, DUCount).
+
+ducount_to_list(DUCount) -> maps:to_list(DUCount).
+
+-spec ducount_merge(ducount(), ducount()) -> ducount().
+ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCA), DCB);
+ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) ->
+  ducount_merge_1(ducount_to_list(DCB), DCA).
+
+ducount_merge_1([], DUCount) -> DUCount;
+ducount_merge_1([{T,AC}|Ts], DUCount0) ->
+  DUCount =
+    case DUCount0 of
+      #{T := BC} -> DUCount0#{T := AC + BC};
+      #{}        -> DUCount0#{T => AC}
+    end,
+  ducount_merge_1(Ts, DUCount).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_1(number_of_temporaries).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+branch_preds(Instr, {TgtMod,TgtCtx}) ->
+  merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))).
+
+livein(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+merge_sorted_preds([]) -> [];
+merge_sorted_preds([{L, P1}, {L, P2}|LPs]) ->
+  merge_sorted_preds([{L, P1+P2}|LPs]);
+merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)].
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl
index 5bbb0ba7c1..29ef3adcc2 100644
--- a/lib/hipe/regalloc/hipe_regalloc_loop.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl
@@ -32,9 +32,11 @@ ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) ->
 ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod,
 	  TargetCtx) ->
   ?inc_counter(ra_calls_counter, 1),
-  SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx),
+  {CFG1, Liveness1} =
+    do_range_split(CFG0, Liveness0, TargetMod, TargetCtx, Options),
+  SpillLimit0 = TargetMod:number_of_temporaries(CFG1, TargetCtx),
   {Coloring, _, CFG, Liveness} =
-    call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options,
+    call_allocator_initial(CFG1, Liveness1, SpillLimit0, SpillIndex, Options,
 			   RegAllocMod, TargetMod, TargetCtx),
   %% The first iteration, the hipe_regalloc_prepass may create new temps, these
   %% should not end up above SpillLimit.
@@ -96,3 +98,20 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod,
       RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod,
 			   TargetCtx, Options)
   end.
+
+do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) ->
+  {CFG2, Liveness1} =
+    case proplists:get_bool(ra_restore_reuse, Options) of
+      true ->
+	CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx),
+	{CFG1, TgtMod:analyze(CFG1, TgtCtx)};
+      false ->
+	{CFG0, Liveness0}
+    end,
+  case proplists:get_bool(ra_range_split, Options) of
+    true ->
+      CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options),
+      {CFG3, TgtMod:analyze(CFG3, TgtCtx)};
+    false ->
+      {CFG2, Liveness1}
+  end.
diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
index e212420ad2..5024840237 100644
--- a/lib/hipe/regalloc/hipe_regalloc_prepass.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
@@ -483,8 +483,8 @@ merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets};
 merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) ->
   merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]);
 merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets}  = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets}  = hipe_dsets:find({exit,L},  DSets1),
   case EntryRoot =:= ExitRoot of
     false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]);
     true ->
@@ -501,7 +501,7 @@ merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) ->
 
 -spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}.
 merge_small_parts(DSets0) ->
-  {RLList, DSets1} = dsets_to_rllist(DSets0),
+  {RLList, DSets1} = hipe_dsets:to_rllist(DSets0),
   RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList],
   merge_small_parts_1(RLLList, DSets1, []).
 
@@ -518,8 +518,8 @@ merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc)
   merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]);
 merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) ->
   ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS),
-  DSets1 = dsets_union(R1, R2, DSets0),
-  {R, DSets} = dsets_find(R1, DSets1),
+  DSets1 = hipe_dsets:union(R1, R2, DSets0),
+  {R, DSets} = hipe_dsets:find(R1, DSets1),
   merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc).
 
 %% @doc Partition an ordering over BBs into subsequences for the dsets that
@@ -531,8 +531,8 @@ part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}).
 
 part_order([], DSets, Acc) -> {Acc, DSets};
 part_order([L|Ls], DSets0, Acc0) ->
-  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0),
-  {ExitRoot,  DSets2} = dsets_find({exit,L},  DSets1),
+  {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0),
+  {ExitRoot,  DSets2} = hipe_dsets:find({exit,L},  DSets1),
   Acc1 = map_append(EntryRoot, L, Acc0),
   %% Only include the label once if both entry and exit is in same partition
   Acc2 = case EntryRoot =:= ExitRoot of
@@ -558,73 +558,26 @@ map_append(Key, Elem, Map) ->
 %% split point, and one from the end to the last split point.
 
 -type bb_dset_key() :: {entry | exit, label()}.
--type bb_dsets() :: dsets(bb_dset_key()).
+-type bb_dsets() :: hipe_dsets:dsets(bb_dset_key()).
 -type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}].
 
 -spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets().
 initial_dsets(CFG, TgtMod, TgtCtx) ->
   Labels = TgtMod:labels(CFG, TgtCtx),
-  DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
+  DSets0 = hipe_dsets:new(lists:append([[{entry,L},{exit,L}] || L <- Labels])),
   Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)]
 			|| L <- Labels]),
-  lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end,
+  lists:foldl(fun({X, Y}, DS) -> hipe_dsets:union({exit,X}, {entry,Y}, DS) end,
 	      DSets0, Edges).
 
 -spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets().
 join_whole_blocks(PartBBList, DSets0) ->
-  lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS);
+  lists:foldl(fun({L, {single, _}}, DS) ->
+		  hipe_dsets:union({entry,L}, {exit,L}, DS);
 		 ({_, {split, _, _}}, DS) -> DS
 	      end, DSets0, PartBBList).
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% The disjoint set forests data structure, for elements of arbitrary types.
-%% Note that the find operation mutates the set.
-%%
-%% We could do this more efficiently if we restricted the elements to integers,
-%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used,
-%% for a persistent interface (which isn't that nice when even accessors return
-%% modified copies), the array module could be used.
--type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}.
-
--spec dsets_new([E]) -> dsets(E).
-dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]).
-
--spec dsets_find(E, dsets(E)) -> {E, dsets(E)}.
-dsets_find(E, DS0) ->
-  case DS0 of
-    #{E := {root,_}} -> {E, DS0};
-    #{E := {node,N}} ->
-      case dsets_find(N, DS0) of
-	{N, _}=T -> T;
-	{R, DS1} -> {R, DS1#{E := {node,R}}}
-      end
-   ;_ -> error(badarg, [E, DS0])
-  end.
-
--spec dsets_union(E, E, dsets(E)) -> dsets(E).
-dsets_union(X, Y, DS0) ->
-  {XRoot, DS1} = dsets_find(X, DS0),
-  case dsets_find(Y, DS1) of
-    {XRoot, DS2} -> DS2;
-    {YRoot, DS2} ->
-      #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2,
-      if XRR < YRR -> DS2#{XRoot := {node,YRoot}};
-	 XRR > YRR -> DS2#{YRoot := {node,XRoot}};
-	 true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}}
-      end
-  end.
-
--spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}.
-dsets_to_rllist(DS0) ->
-  {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0),
-  {maps:to_list(Lists), DS}.
-
-dsets_to_rllist([], Acc, DS) -> {Acc, DS};
-dsets_to_rllist([E|Es], Acc, DS0) ->
-  {ERoot, DS} = dsets_find(E, DS0),
-  dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS).
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Third pass
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Collect all referenced temps in each partition.
diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl
new file mode 100644
index 0000000000..2158bd185e
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_restore_reuse.erl
@@ -0,0 +1,516 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       RESTORE REUSE LIVE RANGE SPLITTING PASS
+%%
+%% This is a simple live range splitter that tries to avoid sequences where a
+%% temporary is accessed on stack multiple times by keeping a copy of that temp
+%% around in a register.
+%%
+%% At any point where a temporary that is expected to be spilled (see uses of
+%% spills_add_list/2) is defined or used, this pass considers that temporary
+%% "available".
+%%
+%% Limitations:
+%%   * If a live range part starts with several different restores, this module
+%%     will introduce a new temp number for each of them, and later be forced to
+%%     generate phi blocks. It would be more efficient to introduce just a
+%%     single temp number. That would also remove the need for the phi blocks.
+%%   * If a live range part ends in a definition, that definition should just
+%%     define the base temp rather than the substitution, since some CISC
+%%     targets might be able to inline the memory access in the instruction.
+-module(hipe_restore_reuse).
+
+-export([split/4]).
+
+%% Exports for hipe_range_split, which uses restore_reuse as one possible spill
+%% "mode"
+-export([analyse/3
+	,renamed_in_block/2
+	,split_in_block/2
+	]).
+-export_type([avail/0]).
+
+-compile(inline).
+
+%% -define(DO_ASSERT, 1).
+-include("../main/hipe.hrl").
+
+-type target_cfg()       :: any().
+-type liveness()         :: any().
+-type target_module()    :: module().
+-type target_context()   :: any().
+-type target()           :: {target_module(), target_context()}.
+-type label()            :: non_neg_integer().
+-type reg()              :: non_neg_integer().
+-type instr()            :: any().
+-type temp()             :: any().
+
+-spec split(target_cfg(), liveness(), target_module(), target_context())
+	   -> target_cfg().
+split(CFG, Liveness, TargetMod, TargetContext) ->
+  Target = {TargetMod, TargetContext},
+  Avail = analyse(CFG, Liveness, Target),
+  rewrite(CFG, Target, Avail).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-opaque avail() :: #{label() => avail_bb()}.
+
+-record(avail_bb, {
+	  %% Blocks where HasCall is true are considered to have too high
+	  %% register pressure to support a register copy of a temp
+	  has_call :: boolean(),
+	  %% AvailOut: Temps that can be split (are available)
+	  out      :: availset(),
+	  %% Gen: AvailOut generated locally
+	  gen      :: availset(),
+	  %% WantIn: Temps that are split
+	  want     :: regset(),
+	  %% Self: Temps with avail-want pairs locally
+	  self     :: regset(),
+	  %% DefIn: Temps shadowed by later def in same live range part
+	  defin    :: regset(),
+	  pred     :: [label()],
+	  succ     :: [label()]
+	 }).
+-type avail_bb() :: #avail_bb{}.
+
+avail_get(L, Avail) -> maps:get(L, Avail).
+avail_set(L, Val, Avail) -> maps:put(L, Val, Avail).
+avail_has_call(L, Avail) -> (avail_get(L, Avail))#avail_bb.has_call.
+avail_out(L, Avail) -> (avail_get(L, Avail))#avail_bb.out.
+avail_self(L, Avail) -> (avail_get(L, Avail))#avail_bb.self.
+avail_pred(L, Avail) -> (avail_get(L, Avail))#avail_bb.pred.
+avail_succ(L, Avail) -> (avail_get(L, Avail))#avail_bb.succ.
+
+avail_in(L, Avail) ->
+  case avail_pred(L, Avail) of
+    [] -> availset_empty(); % entry
+    Pred ->
+      lists:foldl(fun(P, ASet) ->
+		      availset_intersect(avail_out(P, Avail), ASet)
+		  end, availset_top(), Pred)
+  end.
+
+want_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.want.
+want_out(L, Avail) ->
+  lists:foldl(fun(S, Set) ->
+		  ordsets:union(want_in(S, Avail), Set)
+	      end, ordsets:new(), avail_succ(L, Avail)).
+
+def_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.defin.
+def_out(L, Avail) ->
+  case avail_succ(L, Avail) of
+    [] -> ordsets:new(); % entry
+    Succ ->
+      ordsets:intersection([def_in(S, Avail) || S <- Succ])
+  end.
+
+-type regset()  :: ordsets:ordset(reg()).
+-type availset() :: top | regset().
+availset_empty() -> [].
+availset_top() -> top.
+availset_intersect(top, B) -> B;
+availset_intersect(A, top) -> A;
+availset_intersect(A, B) -> ordsets:intersection(A, B).
+availset_union(top, _) -> top;
+availset_union(_, top) -> top;
+availset_union(A, B) -> ordsets:union(A, B).
+ordset_intersect_availset(OS, top) -> OS;
+ordset_intersect_availset(OS, AS) -> ordsets:intersection(OS, AS).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Analysis pass
+%%
+%% The analysis pass collects the set of temps we're interested in splitting
+%% (Spills), and computes three dataflow analyses for this subset of temps.
+%%
+%% Avail, which is the set of temps which are available in register from a
+%%   previous (potential) spill or restore without going through a HasCall
+%%   block.
+%% Want, which is a liveness analysis for the subset of temps used by an
+%%   instruction that are also in Avail at that point. In other words, Want is
+%%   the set of temps that are split (has a register copy) at a particular
+%%   point.
+%% Def, which are the temps that are already going to be spilled later, and so
+%%   need not be spilled when they're defined.
+%%
+%% Lastly, it computes the set Self for each block, which is the temps that have
+%% avail-want pairs in the same block, and so should be split in that block even
+%% if they're not in WantIn for the block.
+
+-spec analyse(target_cfg(), liveness(), target()) -> avail().
+analyse(CFG, Liveness, Target) ->
+  Avail0 = analyse_init(CFG, Liveness, Target),
+  RPO = reverse_postorder(CFG, Target),
+  AvailLs = [L || L <- RPO, not avail_has_call(L, Avail0)],
+  Avail1 = avail_dataf(AvailLs, Avail0),
+  Avail2 = analyse_filter_want(maps:keys(Avail1), Avail1),
+  PO = lists:reverse(RPO),
+  want_dataf(PO, Avail2).
+
+-spec analyse_init(target_cfg(), liveness(), target()) -> avail().
+analyse_init(CFG, Liveness, Target) ->
+  analyse_init(labels(CFG, Target), CFG, Liveness, Target, #{}, []).
+
+-spec analyse_init([label()], target_cfg(), liveness(), target(), spillset(),
+		   [{label(), avail_bb()}])
+		  -> avail().
+analyse_init([], _CFG, _Liveness, Target, Spills0, Acc) ->
+  %% Precoloured temps can't be spilled
+  Spills = spills_filter(fun(R) -> not is_precoloured(R, Target) end, Spills0),
+  analyse_init_1(Acc, Spills, []);
+analyse_init([L|Ls], CFG, Liveness, Target, Spills0, Acc) ->
+  {DefIn, Gen, Self, Want, HasCall0} =
+    analyse_scan(hipe_bb:code(bb(CFG, L, Target)), Target,
+		 ordsets:new(), ordsets:new(), ordsets:new(),
+		 ordsets:new()),
+  {Spills, Out, HasCall} =
+    case HasCall0 of
+      false -> {Spills0, availset_top(), false};
+      {true, CallDefs} ->
+	Spill = ordsets:subtract(liveout(Liveness, L, Target), CallDefs),
+	{spills_add_list(Spill, Spills0), Gen, true}
+    end,
+  Pred = hipe_gen_cfg:pred(CFG, L),
+  Succ = hipe_gen_cfg:succ(CFG, L),
+  Val = #avail_bb{gen=Gen, want=Want, self=Self, out=Out, has_call=HasCall,
+		  pred=Pred, succ=Succ, defin=DefIn},
+  analyse_init(Ls, CFG, Liveness, Target, Spills, [{L, Val} | Acc]).
+
+-spec analyse_init_1([{label(), avail_bb()}], spillset(),
+		     [{label(), avail_bb()}])
+		    -> avail().
+analyse_init_1([], _Spills, Acc) -> maps:from_list(Acc);
+analyse_init_1([{L, Val0}|Vs], Spills, Acc) ->
+  #avail_bb{out=Out,gen=Gen,want=Want,self=Self} = Val0,
+  Val = Val0#avail_bb{
+	  out  = spills_filter_availset(Out,  Spills),
+	  gen  = spills_filter_availset(Gen,  Spills),
+	  want = spills_filter_availset(Want, Spills),
+	  self = spills_filter_availset(Self, Spills)},
+  analyse_init_1(Vs, Spills, [{L, Val} | Acc]).
+
+-type spillset() :: #{reg() => []}.
+-spec spills_add_list([reg()], spillset()) -> spillset().
+spills_add_list([], Spills) -> Spills;
+spills_add_list([R|Rs], Spills) -> spills_add_list(Rs, Spills#{R => []}).
+
+-spec spills_filter_availset(availset(), spillset()) -> availset().
+spills_filter_availset([E|Es], Spills) ->
+  case Spills of
+    #{E := _} -> [E|spills_filter_availset(Es, Spills)];
+    #{} ->          spills_filter_availset(Es, Spills)
+  end;
+spills_filter_availset([], _) -> [];
+spills_filter_availset(top, _) -> top.
+
+spills_filter(Fun, Spills) -> maps:filter(fun(K, _) -> Fun(K) end, Spills).
+
+-spec analyse_scan([instr()], target(), Defset, Gen, Self, Want)
+		  -> {Defset, Gen, Self, Want, HasCall} when
+    HasCall :: false | {true, regset()},
+    Defset  :: regset(),
+    Gen     :: availset(),
+    Self    :: regset(),
+    Want    :: regset().
+analyse_scan([], _Target, Defs, Gen, Self, Want) ->
+  {Defs, Gen, Self, Want, false};
+analyse_scan([I|Is], Target, Defs0, Gen0, Self0, Want0) ->
+  {DefL, UseL} = reg_def_use(I, Target),
+  Use = ordsets:from_list(UseL),
+  Def = ordsets:from_list(DefL),
+  Self = ordsets:union(ordsets:intersection(Use, Gen0), Self0),
+  Want = ordsets:union(ordsets:subtract(Use, Defs0), Want0),
+  Defs = ordsets:union(Def, Defs0),
+  case defines_all_alloc(I, Target) of
+    true ->
+      [] = Is, %assertion
+      {Defs, ordsets:new(), Self, Want, {true, Def}};
+    false ->
+      Gen = ordsets:union(ordsets:union(Def, Use), Gen0),
+      analyse_scan(Is, Target, Defs, Gen, Self, Want)
+  end.
+
+-spec avail_dataf([label()], avail()) -> avail().
+avail_dataf(RPO, Avail0) ->
+  case avail_dataf_once(RPO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      avail_dataf(RPO, Avail)
+  end.
+
+-spec avail_dataf_once([label()], avail(), non_neg_integer())
+		      -> {avail(), non_neg_integer()}.
+avail_dataf_once([], Avail, Changed) -> {Avail, Changed};
+avail_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB = #avail_bb{out=OldOut, gen=Gen} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  {Changed, Avail} =
+    case availset_union(In, Gen) of
+      OldOut -> {Changed0, Avail0};
+      Out -> {Changed0+1, avail_set(L, ABB#avail_bb{out=Out}, Avail0)}
+    end,
+  avail_dataf_once(Ls, Avail, Changed).
+
+-spec analyse_filter_want([label()], avail()) -> avail().
+analyse_filter_want([], Avail) -> Avail;
+analyse_filter_want([L|Ls], Avail0) ->
+  ABB = #avail_bb{want=Want0, defin=DefIn0} = avail_get(L, Avail0),
+  In = avail_in(L, Avail0),
+  Want = ordset_intersect_availset(Want0, In),
+  DefIn = ordset_intersect_availset(DefIn0, In),
+  Avail = avail_set(L, ABB#avail_bb{want=Want, defin=DefIn}, Avail0),
+  analyse_filter_want(Ls, Avail).
+
+-spec want_dataf([label()], avail()) -> avail().
+want_dataf(PO, Avail0) ->
+  case want_dataf_once(PO, Avail0, 0) of
+    {Avail, 0} -> Avail;
+    {Avail, _Changed} ->
+      want_dataf(PO, Avail)
+  end.
+
+-spec want_dataf_once([label()], avail(), non_neg_integer())
+		     -> {avail(), non_neg_integer()}.
+want_dataf_once([], Avail, Changed) -> {Avail, Changed};
+want_dataf_once([L|Ls], Avail0, Changed0) ->
+  ABB0 = #avail_bb{want=OldIn,defin=OldDef} = avail_get(L, Avail0),
+  AvailIn = avail_in(L, Avail0),
+  Out = want_out(L, Avail0),
+  DefOut = def_out(L, Avail0),
+  {Changed, Avail} =
+    case {ordsets:union(ordset_intersect_availset(Out,    AvailIn), OldIn),
+	  ordsets:union(ordset_intersect_availset(DefOut, AvailIn), OldDef)}
+    of
+      {OldIn, OldDef} -> {Changed0, Avail0};
+      {In, DefIn} ->
+	ABB = ABB0#avail_bb{want=In,defin=DefIn},
+	{Changed0+1, avail_set(L, ABB, Avail0)}
+    end,
+  want_dataf_once(Ls, Avail, Changed).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Rewrite pass
+-type subst_dict() :: orddict:orddict(reg(), reg()).
+-type input()      :: #{label() => subst_dict()}.
+
+-spec rewrite(target_cfg(), target(), avail()) -> target_cfg().
+rewrite(CFG, Target, Avail) ->
+  RPO = reverse_postorder(CFG, Target),
+  rewrite(RPO, Target, Avail, #{}, CFG).
+
+-spec rewrite([label()], target(), avail(), input(), target_cfg())
+	     -> target_cfg().
+rewrite([], _Target, _Avail, _Input, CFG) -> CFG;
+rewrite([L|Ls], Target, Avail, Input0, CFG0) ->
+  SplitHere = split_in_block(L, Avail),
+  {Input1, LInput} =
+    case Input0 of
+      #{L := LInput0} -> {Input0, LInput0};
+      #{} -> {Input0#{L => []}, []} % entry block
+    end,
+  ?ASSERT([] =:= [X || X <- SplitHere, orddict:is_key(X, LInput)]),
+  ?ASSERT(want_in(L, Avail) =:= orddict:fetch_keys(LInput)),
+  {CFG1, LOutput} =
+    case {SplitHere, LInput} of
+      {[], []} -> % optimisation (rewrite will do nothing, so skip it)
+	{CFG0, LInput};
+      _ ->
+	Code0 = hipe_bb:code(BB=bb(CFG0, L, Target)),
+	DefOut = def_out(L, Avail),
+	{Code, LOutput0, _DefIn} =
+	  rewrite_instrs(Code0, Target, LInput, DefOut, SplitHere),
+	{update_bb(CFG0, L, hipe_bb:code_update(BB, Code), Target), LOutput0}
+    end,
+  {Input, CFG} = rewrite_succs(avail_succ(L, Avail), Target, L, LOutput, Avail,
+			       Input1, CFG1),
+  rewrite(Ls, Target, Avail, Input, CFG).
+
+-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()).
+renamed_in_block(L, Avail) ->
+  ordsets:union([avail_self(L, Avail), want_in(L, Avail),
+		 want_out(L, Avail)]).
+
+-spec split_in_block(label(), avail()) -> ordsets:ordset(reg()).
+split_in_block(L, Avail) ->
+  ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)),
+		   want_in(L, Avail)).
+
+-spec rewrite_instrs([instr()], target(), subst_dict(), regset(), [reg()])
+		    ->  {[instr()], subst_dict(), regset()}.
+rewrite_instrs([], _Target, Output, DefOut, []) ->
+  {[], Output, DefOut};
+rewrite_instrs([I|Is], Target, Input0, BBDefOut, SplitHere0) ->
+  {TDef, TUse} = def_use(I, Target),
+  {Def, Use} = {reg_names(TDef, Target), reg_names(TUse, Target)},
+  %% Restores are generated in forward order by picking temps from SplitHere as
+  %% they're used or defined. After the last instruction, all temps have been
+  %% picked.
+  {ISplits, SplitHere} =
+    lists:partition(fun(R) ->
+			lists:member(R, Def) orelse lists:member(R, Use)
+		    end, SplitHere0),
+  {Input, Restores} =
+    case ISplits of
+      [] -> {Input0, []};
+      _ ->
+	make_splits(ISplits, Target, TDef, TUse, Input0, [])
+    end,
+  %% Here's the recursive call
+  {Acc0, Output, DefOut} =
+    rewrite_instrs(Is, Target, Input, BBDefOut, SplitHere),
+  %% From here we're processing instructions in reverse order, because to avoid
+  %% redundant spills we need to walk the 'def' dataflow, which is in reverse.
+  SubstFun = fun(Temp) ->
+		 case orddict:find(reg_nr(Temp, Target), Input) of
+		   {ok, NewTemp} -> NewTemp;
+		   error -> Temp
+		 end
+	     end,
+  Acc1 = insert_spills(TDef, Target, Input, DefOut, Acc0),
+  Acc = Restores ++ [subst_temps(SubstFun, I, Target) | Acc1],
+  DefIn = ordsets:union(DefOut, ordsets:from_list(Def)),
+  {Acc, Output, DefIn}.
+
+-spec make_splits([reg()], target(), [temp()], [temp()], subst_dict(),
+		  [instr()])
+		 -> {subst_dict(), [instr()]}.
+make_splits([], _Target, _TDef, _TUse, Input, Acc) ->
+  {Input, Acc};
+make_splits([S|Ss], Target, TDef, TUse, Input0, Acc0) ->
+  SubstReg = new_reg_nr(Target),
+  {Acc, Subst} =
+    case find_reg_temp(S, TUse, Target) of
+      error ->
+	{ok, Temp} = find_reg_temp(S, TDef, Target),
+	{Acc0, update_reg_nr(SubstReg, Temp, Target)};
+      {ok, Temp} ->
+	Subst0 = update_reg_nr(SubstReg, Temp, Target),
+	Acc1 = [mk_move(Temp, Subst0, Target) | Acc0],
+	{Acc1, Subst0}
+    end,
+  Input = orddict:store(S, Subst, Input0),
+  make_splits(Ss, Target, TDef, TUse, Input, Acc).
+
+-spec find_reg_temp(reg(), [temp()], target()) -> error | {ok, temp()}.
+find_reg_temp(_Reg, [], _Target) -> error;
+find_reg_temp(Reg, [T|Ts], Target) ->
+  case reg_nr(T, Target) of
+    Reg -> {ok, T};
+    _ -> find_reg_temp(Reg, Ts, Target)
+  end.
+
+-spec insert_spills([temp()], target(), subst_dict(), regset(), [instr()])
+		   -> [instr()].
+insert_spills([], _Target, _Input, _DefOut, Acc) -> Acc;
+insert_spills([T|Ts], Target, Input, DefOut, Acc0) ->
+  R = reg_nr(T, Target),
+  Acc =
+    case orddict:find(R, Input) of
+      error -> Acc0;
+      {ok, Subst} ->
+	case lists:member(R, DefOut) of
+	  true -> Acc0;
+	  false -> [mk_move(Subst, T, Target) | Acc0]
+	end
+    end,
+  insert_spills(Ts, Target, Input, DefOut, Acc).
+
+-spec rewrite_succs([label()], target(), label(), subst_dict(), avail(),
+		    input(), target_cfg()) -> {input(), target_cfg()}.
+rewrite_succs([], _Target, _P, _POutput, _Avail, Input, CFG) -> {Input, CFG};
+rewrite_succs([L|Ls], Target, P, POutput, Avail, Input0, CFG0) ->
+  NewLInput = orddict_with_ordset(want_in(L, Avail), POutput),
+  {Input, CFG} =
+    case Input0 of
+      #{L := LInput} ->
+	CFG2 =
+	  case required_phi_moves(LInput, NewLInput) of
+	    [] -> CFG0;
+	    ReqMovs ->
+	      PhiLb = new_label(Target),
+	      Code = [mk_move(S,D,Target) || {S,D} <- ReqMovs]
+		++ [mk_goto(L, Target)],
+	      PhiBB = hipe_bb:mk_bb(Code),
+	      CFG1 = update_bb(CFG0, PhiLb, PhiBB, Target),
+	      bb_redirect_jmp(L, PhiLb, P, CFG1, Target)
+	  end,
+	{Input0, CFG2};
+      #{} ->
+	{Input0#{L => NewLInput}, CFG0}
+    end,
+  rewrite_succs(Ls, Target, P, POutput, Avail, Input, CFG).
+
+-spec bb_redirect_jmp(label(), label(), label(), target_cfg(), target())
+		     -> target_cfg().
+bb_redirect_jmp(From, To, Lb, CFG, Target) ->
+  BB0 = bb(CFG, Lb, Target),
+  Last = redirect_jmp(hipe_bb:last(BB0), From, To, Target),
+  BB = hipe_bb:code_update(BB0, hipe_bb:butlast(BB0) ++ [Last]),
+  update_bb(CFG, Lb, BB, Target).
+
+-spec required_phi_moves(subst_dict(), subst_dict()) -> [{reg(), reg()}].
+required_phi_moves([], []) -> [];
+required_phi_moves([P|Is], [P|Os]) -> required_phi_moves(Is, Os);
+required_phi_moves([{K, In}|Is], [{K, Out}|Os]) ->
+  [{Out, In}|required_phi_moves(Is, Os)].
+
+%% @doc Returns a new orddict with the keys in Set and their associated values.
+-spec orddict_with_ordset(ordsets:ordset(K), orddict:orddict(K, V))
+			 -> orddict:orddict(K, V).
+orddict_with_ordset([S|Ss], [{K, _}|_]=Dict) when S < K ->
+  orddict_with_ordset(Ss, Dict);
+orddict_with_ordset([S|_]=Set, [{K, _}|Ds]) when S > K ->
+  orddict_with_ordset(Set, Ds);
+orddict_with_ordset([_S|Ss], [{_K, _}=P|Ds]) -> % _S == _K
+  [P|orddict_with_ordset(Ss, Ds)];
+orddict_with_ordset([], _) -> [];
+orddict_with_ordset(_, []) -> [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Target module interface functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-define(TGT_IFACE_0(N), N(         {M,C}) -> M:N(         C)).
+-define(TGT_IFACE_1(N), N(A1,      {M,C}) -> M:N(A1,      C)).
+-define(TGT_IFACE_2(N), N(A1,A2,   {M,C}) -> M:N(A1,A2,   C)).
+-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)).
+
+?TGT_IFACE_2(bb).
+?TGT_IFACE_1(def_use).
+?TGT_IFACE_1(defines_all_alloc).
+?TGT_IFACE_1(is_precoloured).
+?TGT_IFACE_1(labels).
+?TGT_IFACE_1(mk_goto).
+?TGT_IFACE_2(mk_move).
+?TGT_IFACE_0(new_label).
+?TGT_IFACE_0(new_reg_nr).
+?TGT_IFACE_3(redirect_jmp).
+?TGT_IFACE_1(reg_nr).
+?TGT_IFACE_1(reverse_postorder).
+?TGT_IFACE_2(subst_temps).
+?TGT_IFACE_3(update_bb).
+?TGT_IFACE_2(update_reg_nr).
+
+liveout(Liveness, L, Target={TgtMod,TgtCtx}) ->
+  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)).
+
+reg_names(Regs, {TgtMod,TgtCtx}) ->
+  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].
+
+reg_def_use(I, Target) ->
+  {TDef, TUse} = def_use(I, Target),
+  {reg_names(TDef, Target), reg_names(TUse, Target)}.
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 31fca81316..78b6379eba 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -115,6 +123,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(Instruction, Ctx) ->
@@ -144,9 +155,24 @@ is_move(Instruction, _) ->
     false -> false
   end.
 
+is_spill_move(Instruction, _) ->
+  hipe_sparc:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index 050d65e1a9..485fdc212a 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -24,6 +24,7 @@
 	 ,reg_nr/2
 	 ,def_use/2
 	 ,is_move/2
+	 ,is_spill_move/2
 	 ,is_precoloured/2
 	 ,var_range/2
 	 ,allocatable/1
@@ -46,12 +47,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights, hipe_range_split
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, no_context) ->
   hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring).
 
@@ -108,6 +116,9 @@ bb(CFG, L, _) ->
 update_bb(CFG,L,BB,_) ->
   hipe_sparc_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Branch,_) ->
+  hipe_sparc_cfg:branch_preds(Branch).
+
 %% SPARC stuff
 
 def_use(I, Ctx) ->
@@ -125,9 +136,24 @@ defines_all_alloc(I, _) ->
 is_move(I, _) ->
   hipe_sparc:is_pseudo_fmove(I).
  
+is_spill_move(I, _) ->
+  hipe_sparc:is_pseudo_spill_fmove(I).
+
 reg_nr(Reg, _) ->
   hipe_sparc:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_sparc:mk_pseudo_fmove(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_sparc:mk_b_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew).
+
+new_label(_) ->
+  hipe_gensym:get_next_label(sparc).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(sparc).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index c1c8dbbcd6..dacfb71b00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -46,6 +46,7 @@
 	 def_use/2,
 	 is_arg/2,	% used by hipe_ls_regalloc
 	 is_move/2,
+	 is_spill_move/2,
 	 is_fixed/2,	% used by hipe_graph_coloring_regalloc
 	 is_global/2,
 	 is_precoloured/2,
@@ -63,12 +64,19 @@
 %% callbacks for hipe_regalloc_loop
 -export([check_and_rewrite/3]).
 
-%% callbacks for hipe_regalloc_prepass
--export([new_reg_nr/1,
+%% callbacks for hipe_regalloc_prepass, hipe_range_split
+-export([mk_move/3,
+	 mk_goto/2,
+	 redirect_jmp/4,
+	 new_label/1,
+	 new_reg_nr/1,
 	 update_reg_nr/3,
 	 update_bb/4,
 	 subst_temps/3]).
 
+%% callbacks for hipe_bb_weights
+-export([branch_preds/2]).
+
 check_and_rewrite(CFG, Coloring, _) ->
   ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal').
 
@@ -156,6 +164,9 @@ bb(CFG,L,_) ->
 update_bb(CFG,L,BB,_) ->
   hipe_x86_cfg:bb_add(CFG,L,BB).
 
+branch_preds(Instr,_) ->
+  hipe_x86_cfg:branch_preds(Instr).
+
 %% X86 stuff
 
 def_use(Instruction,_) ->
@@ -200,9 +211,33 @@ is_move(Instruction,_) ->
     false -> false
   end.
 
+is_spill_move(Instruction,_) ->
+  hipe_x86:is_pseudo_spill_move(Instruction).
+
 reg_nr(Reg,_) ->
   hipe_x86:temp_reg(Reg).
 
+mk_move(Src, Dst, _) ->
+  hipe_x86:mk_move(Src, Dst).
+
+mk_goto(Label, _) ->
+  hipe_x86:mk_jmp_label(Label).
+
+redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) ->
+  Ref = make_ref(),
+  put(Ref, false),
+  I = hipe_x86_subst:insn_lbls(
+	fun(Tgt) ->
+	    if Tgt =:= ToOld -> put(Ref, true), ToNew;
+	       is_integer(Tgt) -> Tgt
+	    end
+	end, Jmp),
+  true = erase(Ref), % Assert that something was rewritten
+  I.
+
+new_label(_) ->
+  hipe_gensym:get_next_label(x86).
+
 new_reg_nr(_) ->
   hipe_gensym:get_next_var(x86).
 
diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
index 4b4c83f76d..3fe49e1f00 100644
--- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
@@ -47,6 +47,7 @@
 	 uses/2,
 	 defines/2,
 	 defines_all_alloc/2,
+	 is_spill_move/2,
 	 is_global/2,
 	 reg_nr/2,
 	 physical_name/2,
@@ -158,6 +159,9 @@ defines(I, _) ->
 
 defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I).
 
+is_spill_move(I, _) ->
+  hipe_x86:is_pseudo_spill_fmove(I).
+
 temp_is_double(Temp) ->
   hipe_x86:temp_type(Temp) =:= 'double'.
 
diff --git a/lib/hipe/rtl/Makefile b/lib/hipe/rtl/Makefile
index b4cdf8b1f2..357f1f6950 100644
--- a/lib/hipe/rtl/Makefile
+++ b/lib/hipe/rtl/Makefile
@@ -118,10 +118,12 @@ else
 TYPE_STR=
 endif
 
-ifeq ($(FLAVOR),smp)
-FLAVOR_STR=.smp
-else
+FLAVOR=$(DEFAULT_FLAVOR)
+
+ifeq ($(FLAVOR),plain)
 FLAVOR_STR=
+else
+FLAVOR_STR=.smp
 endif
 
 ifeq ($(XCOMP),yes)
diff --git a/lib/hipe/rtl/hipe_icode2rtl.erl b/lib/hipe/rtl/hipe_icode2rtl.erl
index 82970f04ab..6da8a76d34 100644
--- a/lib/hipe/rtl/hipe_icode2rtl.erl
+++ b/lib/hipe/rtl/hipe_icode2rtl.erl
@@ -532,8 +532,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) ->
 			  FalseLbl, Pred)];
     '=:=' ->
       [Arg1, Arg2] = Args,
+      TypeTestLbl = hipe_rtl:mk_new_label(),
       [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl,
-			  hipe_rtl:label_name(GenLbl), Pred),
+			  hipe_rtl:label_name(TypeTestLbl), Pred),
+       TypeTestLbl,
+       hipe_tagscheme:test_either_immed(Arg1, Arg2, FalseLbl,
+					hipe_rtl:label_name(GenLbl)),
        GenLbl,
        hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args,
 			TestRetName, [], not_remote),
@@ -546,8 +550,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) ->
 			  TrueLbl, 1-Pred)];
     '=/=' ->
       [Arg1, Arg2] = Args,
+      TypeTestLbl = hipe_rtl:mk_new_label(),
       [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl,
-			  hipe_rtl:label_name(GenLbl), 1-Pred),
+			  hipe_rtl:label_name(TypeTestLbl), 1-Pred),
+       TypeTestLbl,
+       hipe_tagscheme:test_either_immed(Arg1, Arg2, TrueLbl,
+					hipe_rtl:label_name(GenLbl)),
        GenLbl,
        hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args,
 			TestRetName, [], not_remote),
diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
index fd0d1f1223..52ea5db382 100644
--- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl
+++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl
@@ -137,43 +137,6 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab
 	       end
 	   end;
 
-	  {bs_put_integer, Size, Flags, ConstInfo} ->
-	    Aligned = aligned(Flags),
-	    LittleEndian = littleendian(Flags),
-	    [NewOffset] = get_real(Dst),
-	    case is_illegal_const(Size) of
-	      true ->
-		[hipe_rtl:mk_goto(FalseLblName)];
-	      false ->
-		case ConstInfo of
-		  fail ->
-		    [hipe_rtl:mk_goto(FalseLblName)];
-		  _ ->
-		    case Args of
-		      [Src, Base, Offset] ->
-			CCode = static_int_c_code(NewOffset, Src,
-						  Base, Offset, Size,
-						  Flags, TrueLblName,
-						  FalseLblName),
-			put_static_int(NewOffset, Src, Base, Offset, Size,
-				       CCode, Aligned, LittleEndian, TrueLblName);
-		      [Src, Bits, Base, Offset] ->
-			{SizeCode, SizeReg} =
-			  hipe_rtl_binary:make_size(Size, Bits,
-						    SystemLimitLblName,
-						    FalseLblName), 
-			CCode = int_c_code(NewOffset, Src, Base,
-					   Offset, SizeReg, Flags,
-					   TrueLblName, FalseLblName),
-			InCode =
-			  put_dynamic_int(NewOffset, Src, Base, Offset,
-					  SizeReg, CCode, Aligned,
-					  LittleEndian, TrueLblName),
-			SizeCode ++ InCode
-		    end
-		end
-	    end;
-
 	  {unsafe_bs_put_integer, 0, _Flags, _ConstInfo} ->
 	    [NewOffset] = get_real(Dst),
 	    case Args of
@@ -186,44 +149,12 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab
 	    end;
 
 	  {unsafe_bs_put_integer, Size, Flags, ConstInfo} ->
-	     case is_illegal_const(Size) of
-	      true ->
-		[hipe_rtl:mk_goto(FalseLblName)];
-	      false ->
-		 Aligned = aligned(Flags),
-		 LittleEndian = littleendian(Flags),
-		 [NewOffset] = get_real(Dst),
-		 case ConstInfo of
-		   fail ->
-		     [hipe_rtl:mk_goto(FalseLblName)];
-		   _ ->
-		     case Args of
-		       [Src, Base, Offset] ->
-			 CCode = static_int_c_code(NewOffset, Src,
-						   Base, Offset, Size,
-						   Flags, TrueLblName,
-						   FalseLblName),
-			 put_unsafe_static_int(NewOffset, Src, Base,
-					       Offset, Size,
-					       CCode, Aligned, LittleEndian,
-					       TrueLblName);
-		       [Src, Bits, Base, Offset] ->
-			 {SizeCode, SizeReg} =
-			   hipe_rtl_binary:make_size(Size, Bits,
-						     SystemLimitLblName,
-						     FalseLblName),
-			 CCode = int_c_code(NewOffset, Src, Base,
-					    Offset, SizeReg, Flags,
-					    TrueLblName, FalseLblName),
-			 InCode =
-			   put_unsafe_dynamic_int(NewOffset, Src, Base,
-						  Offset, SizeReg, CCode,
-						  Aligned, LittleEndian,
-						  TrueLblName),
-			 SizeCode ++ InCode
-		     end
-		 end
-	     end;
+	    do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, true,
+			      TrueLblName, FalseLblName, SystemLimitLblName);
+
+	  {bs_put_integer, Size, Flags, ConstInfo} ->
+	    do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, false,
+			      TrueLblName, FalseLblName, SystemLimitLblName);
 
 	  bs_utf8_size ->
 	    case Dst of
@@ -360,6 +291,40 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab
       {Code, ConstTab}
   end.
 
+%% Common implementation of bs_put_integer and unsafe_bs_put_integer
+do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, SrcUnsafe,
+		  TrueLblName, FalseLblName, SystemLimitLblName) ->
+  case is_illegal_const(Size) of
+    true ->
+      [hipe_rtl:mk_goto(FalseLblName)];
+    false ->
+      Aligned = aligned(Flags),
+      LittleEndian = littleendian(Flags),
+      [NewOffset] = get_real(Dst),
+      case ConstInfo of
+	fail ->
+	  [hipe_rtl:mk_goto(FalseLblName)];
+	_ ->
+	  case Args of
+	    [Src, Base, Offset] ->
+	      CCode = static_int_c_code(NewOffset, Src, Base, Offset, Size,
+					Flags, TrueLblName, FalseLblName),
+	      put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned,
+			     LittleEndian, SrcUnsafe, TrueLblName);
+	    [Src, Bits, Base, Offset] ->
+	      {SizeCode, SizeReg} =
+		hipe_rtl_binary:make_size(Size, Bits, SystemLimitLblName,
+					  FalseLblName),
+	      CCode = int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags,
+				 TrueLblName, FalseLblName),
+	      InCode = put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg,
+				       CCode, Aligned, LittleEndian, SrcUnsafe,
+				       TrueLblName),
+	      SizeCode ++ InCode
+	  end
+      end
+  end.
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%
 %%  Code that is used in the append and init writeable functions
@@ -807,28 +772,8 @@ put_float(_NewOffset, _Src, _Base, _Offset, _Size, CCode, _Aligned,
   CCode.
 
 put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, 
-	       LittleEndian, TrueLblName) ->
-  {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName),
-  case {Aligned, LittleEndian} of
-    {true, true} ->
-      Init ++
-	copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++
-	End;
-    {true, false} ->
-      Init ++
-	copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++
-	End;
-    {false, true} ->
-      CCode;
-    {false, false} ->
-      Init ++
-	copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++
-	End
-  end.
-
-put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned,
-		      LittleEndian, TrueLblName) ->
-  {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName),
+	       LittleEndian, SrcUnsafe, TrueLblName) ->
+  {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName),
   case {Aligned, LittleEndian} of
     {true, true} ->
       Init ++
@@ -847,27 +792,8 @@ put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned,
   end.
 
 put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, 
-		LittleEndian, TrueLblName) ->
-  {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName),
-  case Aligned of
-    true ->
-      case LittleEndian of
-	true ->
-	  Init ++
-	    copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++
-	    End;
-	false ->
-	  Init ++
-	    copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++
-	    End
-	end;
-    false ->
-      CCode
-  end.
-
-put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, 
-		       LittleEndian, TrueLblName) ->
-  {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName),
+		LittleEndian, SrcUnsafe, TrueLblName) ->
+  {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName),
   case Aligned of
     true ->
       case LittleEndian of
@@ -884,14 +810,13 @@ put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned,
       CCode
   end.
 
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%
 %% Help functions used by the above
 %%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-make_init_end(Src, CCode, TrueLblName) ->
+make_init_end(Src, CCode, false, TrueLblName) ->
   [CLbl, SuccessLbl] = create_lbls(2),
   [UntaggedSrc] = create_regs(1),
   Init = [hipe_tagscheme:test_fixnum(Src, hipe_rtl:label_name(SuccessLbl),
@@ -899,9 +824,8 @@ make_init_end(Src, CCode, TrueLblName) ->
 	  SuccessLbl,
 	  hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)],
   End = [hipe_rtl:mk_goto(TrueLblName), CLbl| CCode],
-  {Init, End, UntaggedSrc}.
-
-make_init_end(Src, TrueLblName) ->
+  {Init, End, UntaggedSrc};
+make_init_end(Src, _CCode, true, TrueLblName) ->
   [UntaggedSrc] = create_regs(1),
   Init = [hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)],
   End = [hipe_rtl:mk_goto(TrueLblName)],
diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl
index 35d1e7c8a4..68cbe75e85 100644
--- a/lib/hipe/rtl/hipe_tagscheme.erl
+++ b/lib/hipe/rtl/hipe_tagscheme.erl
@@ -40,6 +40,7 @@
 	 fixnum_gt/5, fixnum_lt/5, fixnum_ge/5, fixnum_le/5, fixnum_val/1,
 	 fixnum_mul/4, fixnum_addsub/5, fixnum_andorxor/4, fixnum_not/2,
 	 fixnum_bsr/3, fixnum_bsl/3]).
+-export([test_either_immed/4]).
 -export([unsafe_car/2, unsafe_cdr/2,
 	 unsafe_constant_element/3, unsafe_update_element/3, element/6]).
 -export([unsafe_closure_element/3]).
@@ -363,14 +364,17 @@ test_matchstate(X, TrueLab, FalseLab, Pred) ->
    mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_BIN_MATCHSTATE, 
 		    TrueLab, FalseLab, Pred)].
 
+test_bitstr_header(HdrTmp, TrueLab, FalseLab, Pred) ->
+  Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK,
+  mask_and_compare(HdrTmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred).
+
 test_bitstr(X, TrueLab, FalseLab, Pred) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   HalfTrueLab = hipe_rtl:mk_new_label(),
-  Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK,
   [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred),
    HalfTrueLab,
    get_header(Tmp, X),
-   mask_and_compare(Tmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred)].
+   test_bitstr_header(Tmp, TrueLab, FalseLab, Pred)].
 
 test_binary(X, TrueLab, FalseLab, Pred) ->
   Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
@@ -378,12 +382,10 @@ test_binary(X, TrueLab, FalseLab, Pred) ->
   IsBoxedLab = hipe_rtl:mk_new_label(),
   IsBitStrLab = hipe_rtl:mk_new_label(),
   IsSubBinLab =  hipe_rtl:mk_new_label(),
-  Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK,
   [test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), FalseLab, Pred),
    IsBoxedLab,
    get_header(Tmp1, X),
-   mask_and_compare(Tmp1, Mask, ?TAG_HEADER_REFC_BIN,
-		    hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred),
+   test_bitstr_header(Tmp1, hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred),
    IsBitStrLab,
    mask_and_compare(Tmp1, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN,
 		    hipe_rtl:label_name(IsSubBinLab), TrueLab, 0.5),
@@ -453,6 +455,10 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) ->
   Tmp = hipe_rtl:mk_new_reg_gcsafe(),
   test_fixnums_1([Tmp|Args], [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc]).
 
+test_two_fixnums(Arg, Arg, FalseLab) ->
+  TrueLab = hipe_rtl:mk_new_label(),
+  [test_fixnum(Arg, hipe_rtl:label_name(TrueLab), FalseLab, 0.99),
+   TrueLab];
 test_two_fixnums(Arg1, Arg2, FalseLab) ->
   TrueLab = hipe_rtl:mk_new_label(),
   case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of
@@ -567,8 +573,8 @@ fixnum_andorxor(AluOp, Arg1, Arg2, Res) ->
   case AluOp of
     'xor' ->
       Tmp = hipe_rtl:mk_new_reg_gcsafe(),
-      [hipe_rtl:mk_alu(Tmp, Arg1, 'xor', Arg2),	% clears tag :-(
-       hipe_rtl:mk_alu(Res, Tmp, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))];
+      [hipe_rtl:mk_alu(Tmp, Arg1, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)),
+       hipe_rtl:mk_alu(Res, Tmp, 'xor', Arg2)];
     _ -> hipe_rtl:mk_alu(Res, Arg1, AluOp, Arg2)
   end.
 
@@ -595,6 +601,21 @@ fixnum_bsl(Arg1, Arg2, Res) ->
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+%% Test if either of two values are immediate (primary tag IMMED1, 0x3)
+test_either_immed(Arg1, Arg2, TrueLab, FalseLab) ->
+  %% This test assumes primary tag 0x0 is reserved and immed has tag 0x3
+  16#0 = ?TAG_PRIMARY_HEADER,
+  16#3 = ?TAG_PRIMARY_IMMED1,
+  Tmp1 = hipe_rtl:mk_new_reg_gcsafe(),
+  Tmp2 = hipe_rtl:mk_new_reg_gcsafe(),
+  [hipe_rtl:mk_alu(Tmp1, Arg1, 'sub', hipe_rtl:mk_imm(1)),
+   hipe_rtl:mk_alu(Tmp2, Arg2, 'sub', hipe_rtl:mk_imm(1)),
+   hipe_rtl:mk_alu(Tmp2, Tmp2, 'or', Tmp1),
+   hipe_rtl:mk_branch(Tmp2, 'and', hipe_rtl:mk_imm(2), eq,
+		      FalseLab, TrueLab, 0.01)].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 unsafe_car(Dst, Arg) ->
   hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST))).
 
@@ -631,14 +652,13 @@ unsafe_update_element(Tuple, Index, Value) ->   % Index is an immediate
 element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) ->
   FixnumOkLab = hipe_rtl:mk_new_label(),
   IndexOkLab = hipe_rtl:mk_new_label(),
-  Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   UIndex = hipe_rtl:mk_new_reg_gcsafe(),
   Arity = hipe_rtl:mk_imm(A),
-  InvIndex = hipe_rtl:mk_new_reg_gcsafe(),
-  Offset = hipe_rtl:mk_new_reg_gcsafe(),
   case IndexInfo of
     valid ->
       %% This is no branch, 1 load and 3 alus = 4 instr
+      Offset = hipe_rtl:mk_new_reg_gcsafe(),
+      Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
       [untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
        hipe_rtl:mk_alu(Offset, UIndex, 'sll', 
@@ -647,72 +667,56 @@ element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) ->
     fixnums ->
       %% This is 1 branch, 1 load and 4 alus = 6 instr
       [untag_fixnum(UIndex, Index),
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, 
-			FailLabName, IndexOkLab)];
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)];
     _ ->
       %% This is 3 branches, 1 load and 5 alus = 9 instr
       [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab),
 		   FailLabName, 0.99),
        FixnumOkLab,
        untag_fixnum(UIndex, Index),
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, 
-			FailLabName, IndexOkLab)]
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]
   end;
 element(Dst, Index, Tuple, FailLabName, tuple, IndexInfo) ->
   FixnumOkLab = hipe_rtl:mk_new_label(),
   IndexOkLab = hipe_rtl:mk_new_label(),
-  Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   Header = hipe_rtl:mk_new_reg_gcsafe(),
   UIndex = hipe_rtl:mk_new_reg_gcsafe(),
   Arity = hipe_rtl:mk_new_reg_gcsafe(),
-  InvIndex = hipe_rtl:mk_new_reg_gcsafe(),
-  Offset = hipe_rtl:mk_new_reg_gcsafe(),
   case IndexInfo of
     fixnums ->
       %% This is 1 branch, 2 loads and 5 alus = 8 instr
-      [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
-       hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
+      [get_header(Header, Tuple),
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, 
-			FailLabName, IndexOkLab)];
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)];
     Num when is_integer(Num) ->
       %% This is 1 branch, 1 load and 3 alus = 5 instr
-      [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))|
-       gen_element_tail(Dst, Ptr, InvIndex, hipe_rtl:mk_imm(Num), 
-			Offset, UIndex, FailLabName, IndexOkLab)];
+      gen_element_tail(Dst, Tuple, hipe_rtl:mk_imm(Num), UIndex, FailLabName,
+		       IndexOkLab);
     _ ->
       %% This is 2 branches, 2 loads and 6 alus = 10 instr
       [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99),
        FixnumOkLab,
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
-       hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
+       get_header(Header, Tuple),
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, 
-			FailLabName, IndexOkLab)]
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]
   end;
 element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
   FixnumOkLab = hipe_rtl:mk_new_label(),
   BoxedOkLab = hipe_rtl:mk_new_label(),
   TupleOkLab = hipe_rtl:mk_new_label(),
   IndexOkLab = hipe_rtl:mk_new_label(),
-  Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   Header = hipe_rtl:mk_new_reg_gcsafe(),
   UIndex = hipe_rtl:mk_new_reg_gcsafe(),
   Arity = hipe_rtl:mk_new_reg_gcsafe(),
-  InvIndex = hipe_rtl:mk_new_reg_gcsafe(),
-  Offset = hipe_rtl:mk_new_reg_gcsafe(),
   case IndexInfo of
     fixnums ->
       %% This is 3 branches, 2 loads and 5 alus = 10 instr
       [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab),
 		     FailLabName, 0.99),
        BoxedOkLab,
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
-       hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
+       get_header(Header, Tuple),
        hipe_rtl:mk_branch(Header, 'and',
 			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
 			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
@@ -720,23 +724,21 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
 		       hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, 
-			UIndex, FailLabName, IndexOkLab)];
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)];
     Num when is_integer(Num) ->
       %% This is 3 branches, 2 loads and 4 alus = 9 instr
       [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab),
 		     FailLabName, 0.99),
        BoxedOkLab,
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
-       hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
+       get_header(Header, Tuple),
        hipe_rtl:mk_branch(Header, 'and',
 			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
 			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
        TupleOkLab,
        hipe_rtl:mk_alu(Arity, Header, 'srl', 
 		       hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, 
-			hipe_rtl:mk_imm(Num), FailLabName, IndexOkLab)];
+       gen_element_tail(Dst, Tuple, Arity, hipe_rtl:mk_imm(Num), FailLabName,
+			IndexOkLab)];
     _ ->
       %% This is 4 branches, 2 loads, and 6 alus = 12 instr :(
       [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab),
@@ -745,8 +747,7 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab),
 		     FailLabName, 0.99),
        BoxedOkLab,
-       hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
-       hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)),
+       get_header(Header, Tuple),
        hipe_rtl:mk_branch(Header, 'and',
 			  hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq',
 			  hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99),
@@ -754,20 +755,21 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) ->
        untag_fixnum(UIndex, Index),
        hipe_rtl:mk_alu(Arity, Header, 'srl',
 		       hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))|
-       gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset,
-			UIndex, FailLabName, IndexOkLab)]
+       gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]
   end.
 
-gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, 
-		 UIndex, FailLabName, IndexOkLab) ->
+gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab) ->
+  ZeroIndex = hipe_rtl:mk_new_reg_gcsafe(),
+  Offset = hipe_rtl:mk_new_reg_gcsafe(),
+  Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple
   %% now check that 1 <= UIndex <= Arity
-  %% if UIndex < 1, then (Arity - UIndex) >= Arity
-  %% if UIndex > Arity, then (Arity - UIndex) < 0, which is >=u Arity
-  %% otherwise, 0 <= (Arity - UIndex) < Arity
-  [hipe_rtl:mk_alu(InvIndex, Arity, 'sub', UIndex),
-   hipe_rtl:mk_branch(InvIndex, 'geu', Arity, FailLabName,
+  %% by checking the equivalent (except for when Arity>=2^(WordSize-1))
+  %% (UIndex - 1) <u Arity
+  [hipe_rtl:mk_alu(ZeroIndex, UIndex, 'sub', hipe_rtl:mk_imm(1)),
+   hipe_rtl:mk_branch(ZeroIndex, 'geu', Arity, FailLabName,
 		      hipe_rtl:label_name(IndexOkLab), 0.01),
    IndexOkLab,
+   hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)),
    hipe_rtl:mk_alu(Offset, UIndex, 'sll',
                    hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())),
    hipe_rtl:mk_load(Dst, Ptr, Offset)].
diff --git a/lib/hipe/sparc/hipe_sparc.erl b/lib/hipe/sparc/hipe_sparc.erl
index 916857b224..22e0761b69 100644
--- a/lib/hipe/sparc/hipe_sparc.erl
+++ b/lib/hipe/sparc/hipe_sparc.erl
@@ -87,6 +87,9 @@
 
 	 mk_pseudo_set/2,
 
+	 mk_pseudo_spill_move/3,
+	 is_pseudo_spill_move/1,
+
 	 mk_pseudo_tailcall/4,
 	 pseudo_tailcall_funv/1,
 	 pseudo_tailcall_linkage/1,
@@ -117,6 +120,9 @@
 	 pseudo_fmove_src/1,
 	 pseudo_fmove_dst/1,
 
+	 mk_pseudo_spill_fmove/3,
+	 is_pseudo_spill_fmove/1,
+
 	 mk_pseudo_fstore/3,
 	 mk_fstore/4,
 
@@ -269,6 +275,10 @@ mk_pseudo_ret() -> #pseudo_ret{}.
 
 mk_pseudo_set(Imm, Dst) -> #pseudo_set{imm=Imm, dst=Dst}.
 
+mk_pseudo_spill_move(Src, Temp, Dst) ->
+  #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}.
+is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move).
+
 mk_pseudo_tailcall(FunV, Arity, StkArgs, Linkage) ->
   #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage}.
 pseudo_tailcall_funv(#pseudo_tailcall{funv=FunV}) -> FunV.
@@ -375,6 +385,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end.
 pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src.
 pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst.
 
+mk_pseudo_spill_fmove(Src, Temp, Dst) ->
+  #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}.
+is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove).
+
 mk_pseudo_fstore(Src, Base, Disp) ->
   #pseudo_fstore{src=Src, base=Base, disp=Disp}.
 
diff --git a/lib/hipe/sparc/hipe_sparc.hrl b/lib/hipe/sparc/hipe_sparc.hrl
index 4eae6777a9..f60e516e59 100644
--- a/lib/hipe/sparc/hipe_sparc.hrl
+++ b/lib/hipe/sparc/hipe_sparc.hrl
@@ -88,6 +88,8 @@
 -record(pseudo_move, {src, dst}).
 -record(pseudo_ret, {}).
 -record(pseudo_set, {imm, dst}).
+-record(pseudo_spill_fmove, {src, temp, dst}).
+-record(pseudo_spill_move, {src, temp, dst}).
 -record(pseudo_tailcall, {funv, arity, stkargs, linkage}).
 -record(pseudo_tailcall_prepare, {}).
 -record(rdy, {dst}).
diff --git a/lib/hipe/sparc/hipe_sparc_assemble.erl b/lib/hipe/sparc/hipe_sparc_assemble.erl
index 08bd47c4d2..2b82f41d23 100644
--- a/lib/hipe/sparc/hipe_sparc_assemble.erl
+++ b/lib/hipe/sparc/hipe_sparc_assemble.erl
@@ -32,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) ->
 	  || {MFA, Defun} <- CompiledCode],
   %%
   {ConstAlign,ConstSize,ConstMap,RefsFromConsts} =
-    hipe_pack_constants:pack_constants(Code, 4),
+    hipe_pack_constants:pack_constants(Code),
   %%
   {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} =
     encode(translate(Code, ConstMap), Options),
diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl
index 27374d187b..45c8e887b5 100644
--- a/lib/hipe/sparc/hipe_sparc_cfg.erl
+++ b/lib/hipe/sparc/hipe_sparc_cfg.erl
@@ -23,6 +23,7 @@
 -export([linearise/1]).
 -export([params/1]).
 -export([arity/1]). % for linear scan
+-export([redirect_jmp/3, branch_preds/1]).
 
 -define(SPARC_CFG, true).     % needed for cfg.inc
 
@@ -77,28 +78,53 @@ branch_successors(Branch) ->
     #pseudo_tailcall{} -> []
   end.
 
+branch_preds(Branch) ->
+  case Branch of
+    #jmp{labels=Labels} ->
+      Prob = 1.0/length(Labels),
+      [{L, Prob} || L <- Labels];
+    #pseudo_bp{true_label=TrueLab,false_label=FalseLab,pred=Pred} ->
+      [{FalseLab, 1.0-Pred}, {TrueLab, Pred}];
+    #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=[]}} ->
+      %% A function can still cause an exception, even if we won't catch it
+      [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}];
+    #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=ExnLab}} ->
+      CallExnPred = hipe_bb_weights:call_exn_pred(),
+      [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}];
+    _ ->
+      case branch_successors(Branch) of
+	[] -> [];
+	[Single] -> [{Single, 1.0}]
+      end
+  end.
+
 -ifdef(REMOVE_TRIVIAL_BBS_NEEDED).
 fails_to(_Instr) -> [].
 -endif.
 
--ifdef(notdef).
 redirect_jmp(I, Old, New) ->
   case I of
-    #b_label{label=Label} ->
-      if Old =:= Label -> I#b_label{label=New};
+    #bp{'cond'='a',label=Label} ->
+      if Old =:= Label -> I#bp{label=New};
 	 true -> I
       end;
-    #pseudo_bc{true_label=TrueLab, false_label=FalseLab} ->
-      I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New};
+    #pseudo_bp{true_label=TrueLab, false_label=FalseLab} ->
+      I1 = if Old =:= TrueLab -> I#pseudo_bp{true_label=New};
 	      true -> I
 	   end,
-      if Old =:= FalseLab -> I1#pseudo_bc{false_label=New};
+      if Old =:= FalseLab -> I1#pseudo_bp{false_label=New};
 	 true -> I1
       end;
-    %% handle pseudo_call too?
-    _ -> I
+    #pseudo_call{contlab=ContLab0, sdesc=SDesc0} ->
+      SDesc = case SDesc0 of
+		#sparc_sdesc{exnlab=Old} -> SDesc0#sparc_sdesc{exnlab=New};
+		#sparc_sdesc{exnlab=_}   -> SDesc0
+	      end,
+      ContLab = if Old =:= ContLab0 -> New;
+		   true -> ContLab0
+		end,
+      I#pseudo_call{sdesc=SDesc, contlab=ContLab}
   end.
--endif.
 
 mk_goto(Label) ->
   hipe_sparc:mk_b_label(Label).
diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl
index cb75f82e2b..4d4b11e301 100644
--- a/lib/hipe/sparc/hipe_sparc_defuse.erl
+++ b/lib/hipe/sparc/hipe_sparc_defuse.erl
@@ -39,6 +39,7 @@ insn_def_gpr(I) ->
     #pseudo_call{} -> call_clobbered_gpr();
     #pseudo_move{dst=Dst} -> [Dst];
     #pseudo_set{dst=Dst} -> [Dst];
+    #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst];
     #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr();
     #rdy{dst=Dst} -> [Dst];
     #sethi{dst=Dst} -> [Dst];
@@ -72,6 +73,7 @@ insn_use_gpr(I) ->
       funv_use(FunV, arity_use_gpr(Arity));
     #pseudo_move{src=Src} -> [Src];
     #pseudo_ret{} -> [hipe_sparc:mk_rv()];
+    #pseudo_spill_move{src=Src} -> [Src];
     #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} ->
       addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity))));
     #store{src=Src,base=Base,disp=Disp} ->
@@ -112,6 +114,7 @@ insn_def_fpr(I) ->
     #fp_unary{dst=Dst} -> [Dst];
     #pseudo_fload{dst=Dst} -> [Dst];
     #pseudo_fmove{dst=Dst} -> [Dst];
+    #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst];
     _ -> []
   end.
 
@@ -130,6 +133,7 @@ insn_use_fpr(I) ->
     #fp_unary{src=Src} -> [Src];
     #pseudo_fmove{src=Src} -> [Src];
     #pseudo_fstore{src=Src} -> [Src];
+    #pseudo_spill_fmove{src=Src} -> [Src];
     _ -> []
   end.
 
diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl
index 6f29c3c905..1f2a259ca1 100644
--- a/lib/hipe/sparc/hipe_sparc_frame.erl
+++ b/lib/hipe/sparc/hipe_sparc_frame.erl
@@ -82,6 +82,10 @@ do_insn(I, LiveOut, Context, FPoff) ->
       {do_pseudo_tailcall(I, Context), context_framesize(Context)};
     #pseudo_fmove{} ->
       {do_pseudo_fmove(I, Context, FPoff), FPoff};
+    #pseudo_spill_move{} ->
+      {do_pseudo_spill_move(I, Context, FPoff), FPoff};
+    #pseudo_spill_fmove{} ->
+      {do_pseudo_spill_fmove(I, Context, FPoff), FPoff};
     _ ->
       {[I], FPoff}
   end.
@@ -110,6 +114,22 @@ do_pseudo_move(I, Context, FPoff) ->
       end
   end.
 
+do_pseudo_spill_move(I, Context, FPoff) ->
+  #pseudo_spill_move{src=Src,temp=Temp,dst=Dst} = I,
+  case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of
+    false -> % Register allocator changed its mind, turn back to move
+      do_pseudo_move(hipe_sparc:mk_pseudo_move(Src, Dst), Context, FPoff);
+    true ->
+      SrcOffset = pseudo_offset(Src, FPoff, Context),
+      DstOffset = pseudo_offset(Dst, FPoff, Context),
+      case SrcOffset =:= DstOffset of
+	true -> []; % omit move-to-self
+	false ->
+	  mk_load(hipe_sparc:mk_sp(), SrcOffset, Temp,
+		 mk_store(Temp, hipe_sparc:mk_sp(), DstOffset, []))
+      end
+  end.
+
 do_pseudo_fmove(I, Context, FPoff) ->
   Dst = hipe_sparc:pseudo_fmove_dst(I),
   Src = hipe_sparc:pseudo_fmove_src(I),
@@ -127,6 +147,22 @@ do_pseudo_fmove(I, Context, FPoff) ->
       end
   end.
 
+do_pseudo_spill_fmove(I, Context, FPoff) ->
+  #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst} = I,
+  case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of
+    false -> % Register allocator changed its mind, turn back to fmove
+      do_pseudo_fmove(hipe_sparc:mk_pseudo_fmove(Src, Dst), Context, FPoff);
+    true ->
+      SrcOffset = pseudo_offset(Src, FPoff, Context),
+      DstOffset = pseudo_offset(Dst, FPoff, Context),
+      case SrcOffset =:= DstOffset of
+	true -> []; % omit move-to-self
+	false ->
+	  mk_fload(hipe_sparc:mk_sp(), SrcOffset, Temp)
+	    ++ mk_fstore(Temp, hipe_sparc:mk_sp(), DstOffset)
+      end
+  end.
+
 pseudo_offset(Temp, FPoff, Context) ->
   FPoff + context_offset(Context, Temp).
 
diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl
index 5fdb73e197..a724821992 100644
--- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl
+++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl
@@ -38,6 +38,7 @@ ra_insn(I, Map, FPMap) ->
     #pseudo_call{} -> ra_pseudo_call(I, Map);
     #pseudo_move{} -> ra_pseudo_move(I, Map);
     #pseudo_set{} -> ra_pseudo_set(I, Map);
+    #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map);
     #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map);
     #rdy{} -> ra_rdy(I, Map);
     #sethi{} -> ra_sethi(I, Map);
@@ -47,6 +48,7 @@ ra_insn(I, Map, FPMap) ->
     #pseudo_fload{} -> ra_pseudo_fload(I, Map, FPMap);
     #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap);
     #pseudo_fstore{} -> ra_pseudo_fstore(I, Map, FPMap);
+    #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap);
     _ -> I
   end.
 
@@ -80,6 +82,12 @@ ra_pseudo_set(I=#pseudo_set{dst=Dst}, Map) ->
   NewDst = ra_temp(Dst, Map),
   I#pseudo_set{dst=NewDst}.
 
+ra_pseudo_spill_move(I=#pseudo_spill_move{src=Src,temp=Temp,dst=Dst}, Map) ->
+  NewSrc = ra_temp(Src, Map),
+  NewTemp = ra_temp(Temp, Map),
+  NewDst = ra_temp(Dst, Map),
+  I#pseudo_spill_move{src=NewSrc,temp=NewTemp,dst=NewDst}.
+
 ra_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV,stkargs=StkArgs}, Map) ->
   NewFunV = ra_funv(FunV, Map),
   NewStkArgs = ra_args(StkArgs, Map),
@@ -120,6 +128,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, FPMap) ->
   NewDst = ra_temp_fp(Dst, FPMap),
   I#pseudo_fmove{src=NewSrc,dst=NewDst}.
 
+ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst},
+		      FPMap) ->
+  NewSrc = ra_temp_fp(Src, FPMap),
+  NewTemp = ra_temp_fp(Temp, FPMap),
+  NewDst = ra_temp_fp(Dst, FPMap),
+  I#pseudo_spill_fmove{src=NewSrc,temp=NewTemp,dst=NewDst}.
+
 ra_pseudo_fstore(I=#pseudo_fstore{src=Src,base=Base}, Map, FPMap) ->
   NewSrc = ra_temp_fp(Src, FPMap),
   NewBase = ra_temp(Base, Map),
diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl
index 984c97fbd4..d3ecb43ec6 100644
--- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl
+++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl
@@ -54,6 +54,7 @@ do_insn(I, TempMap, Strategy) ->
     #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy);
     #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy);
     #pseudo_set{} -> do_pseudo_set(I, TempMap, Strategy);
+    #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy);
     #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy);
     #rdy{} -> do_rdy(I, TempMap, Strategy);
     #sethi{} -> do_sethi(I, TempMap, Strategy);
@@ -92,14 +93,16 @@ do_pseudo_call(I=#pseudo_call{funv=FunV}, TempMap, Strategy) ->
 
 do_pseudo_move(I=#pseudo_move{src=Src,dst=Dst}, TempMap, Strategy) ->
   %% Either Dst or Src (but not both) may be a pseudo temp.
-  %% pseudo_move is a special case: in [XXX: not pseudo_tailcall]
-  %% all other instructions, all temps must be non-pseudos
-  %% after register allocation.
-  case temp_is_spilled(Dst, TempMap) of
-    true -> % Src must not be a pseudo
-      {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy),
-      NewI = I#pseudo_move{src=NewSrc},
-      {FixSrc ++ [NewI], DidSpill};
+  %% pseudo_move and pseudo_spill_move [XXX: not pseudo_tailcall]
+  %% are special cases: in all other instructions, all temps must
+  %% be non-pseudos after register allocation.
+  case temp_is_spilled(Src, TempMap)
+    andalso temp_is_spilled(Dst, TempMap)
+  of
+    true -> % Turn into pseudo_spill_move
+      Temp = clone(Src, temp1(Strategy)),
+      NewI = #pseudo_spill_move{src=Src,temp=Temp,dst=Dst},
+      {[NewI], true};
     _ ->
       {[I], false}
   end.
@@ -109,6 +112,11 @@ do_pseudo_set(I=#pseudo_set{dst=Dst}, TempMap, Strategy) ->
   NewI = I#pseudo_set{dst=NewDst},
   {[NewI | FixDst], DidSpill}.
 
+do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = temp_is_spilled(Temp, TempMap),
+  {[I], false}.
+
 do_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV}, TempMap, Strategy) ->
   {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy),
   NewI = I#pseudo_tailcall{funv=NewFunV},
diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl
index 751e91425c..5fa3a5fc59 100644
--- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl
+++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl
@@ -43,6 +43,7 @@ do_insn(I, TempMap) ->
     #pseudo_fload{} -> do_pseudo_fload(I, TempMap);
     #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap);
     #pseudo_fstore{} -> do_pseudo_fstore(I, TempMap);
+    #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap);
     _ -> {[I], false}
   end.
 
@@ -67,11 +68,13 @@ do_pseudo_fload(I=#pseudo_fload{dst=Dst}, TempMap) ->
   {[NewI | FixDst], DidSpill}.
 
 do_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, TempMap) ->
-  case temp_is_spilled(Dst, TempMap) of
-    true ->
-      {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap),
-      NewI = I#pseudo_fmove{src=NewSrc},
-      {FixSrc ++ [NewI], DidSpill};
+  case temp_is_spilled(Src, TempMap)
+    andalso temp_is_spilled(Dst, TempMap)
+  of
+    true -> % Turn into pseudo_spill_fmove
+      Temp = clone(Src),
+      NewI = #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst},
+      {[NewI], true};
     _ ->
       {[I], false}
   end.
@@ -81,6 +84,11 @@ do_pseudo_fstore(I=#pseudo_fstore{src=Src}, TempMap) ->
   NewI = I#pseudo_fstore{src=NewSrc},
   {FixSrc ++ [NewI], DidSpill}.
 
+do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = temp_is_spilled(Temp, TempMap),
+  {[I], false}.
+
 %%% Fix Dst and Src operands.
 
 fix_src(Src, TempMap) ->
diff --git a/lib/hipe/sparc/hipe_sparc_subst.erl b/lib/hipe/sparc/hipe_sparc_subst.erl
index 1d0671464e..ce3bbb813a 100644
--- a/lib/hipe/sparc/hipe_sparc_subst.erl
+++ b/lib/hipe/sparc/hipe_sparc_subst.erl
@@ -44,6 +44,8 @@ insn_temps(T, I) ->
       #pseudo_move{src=S,dst=D} -> I#pseudo_move{src=T(S),dst=T(D)};
       #pseudo_ret{} -> I;
       #pseudo_set{dst=D}-> I#pseudo_set{dst=T(D)};
+      #pseudo_spill_move{src=S,temp=U,dst=D} ->
+	  I#pseudo_spill_move{src=T(S),temp=T(U),dst=T(D)};
       #pseudo_tailcall{funv=F,stkargs=Stk} ->
 	  I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)};
       #pseudo_tailcall_prepare{} -> I;
@@ -57,7 +59,9 @@ insn_temps(T, I) ->
 	  I#pseudo_fload{base=T(B),disp=S2(Di),dst=T(Ds)};
       #pseudo_fmove{src=S,dst=D} -> I#pseudo_fmove{src=T(S),dst=T(D)};
       #pseudo_fstore{src=S,base=B,disp=D} ->
-	  I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)}
+	  I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)};
+      #pseudo_spill_fmove{src=S,temp=U,dst=D} ->
+	  I#pseudo_spill_fmove{src=T(S),temp=T(U),dst=T(D)}
   end.
 
 -spec src2_temps(subst_fun(), src2()) -> src2().
diff --git a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl
index caa0e71d0b..430e097b91 100644
--- a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl
+++ b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl
@@ -18,6 +18,7 @@ test() ->
   ok = test_R12B5_seg_fault(),
   ok = test_switch_neg_int(),
   ok = test_icode_range_anal(),
+  ok = test_icode_range_call(),
   ok.
 
 %%-----------------------------------------------------------------------
@@ -461,3 +462,44 @@ g(X, Z) ->
     test -> non_zero_test;
     other -> other
   end.
+
+%%-----------------------------------------------------------------------
+%% From: Rich Neswold
+%% Date: Oct 5, 2016
+%%
+%% The following was a bug in the HiPE compiler's range analysis. The
+%% function range_client/2 below would would not stop when N reached 0,
+%% but keep recursing into the second clause forever.
+%%
+%% The problem turned out to be in hipe_icode_range:analyse_call/2,
+%% which would note update the argument ranges of the callee if the
+%% result of the call was ignored.
+%% -----------------------------------------------------------------------
+-define(TIMEOUT, 42).
+
+test_icode_range_call() ->
+    Self = self(),
+    Client = spawn_link(fun() -> range_client(Self, 4) end),
+    range_server(4, Client).
+
+range_server(0, _Client) ->
+    receive
+        stopping -> ok;
+        {called_with, 0} -> error(failure)
+    after ?TIMEOUT -> error(timeout)
+    end;
+range_server(N, Client) ->
+    receive
+        {called_with, N} ->
+            Client ! proceed
+    after ?TIMEOUT -> error(timeout)
+    end,
+    range_server(N-1, Client). % tailcall (so the bug does not affect it)
+
+range_client(Server, 0) ->
+    Server ! stopping;
+range_client(Server, N) ->
+    Server ! {called_with, N},
+    receive proceed -> ok end,
+    range_client(Server, N - 1), % non-tailrecursive call with ignored result
+    ok.
diff --git a/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl
new file mode 100644
index 0000000000..9bf5cf52cd
--- /dev/null
+++ b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl
@@ -0,0 +1,142 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%----------------------------------------------------------------------
+%%% Contains
+%%%----------------------------------------------------------------------
+-module(basic_edge_cases).
+
+-export([test/0]).
+
+test() ->
+  ok = test_float_spills(),
+  ok = test_infinite_loops(),
+  ok.
+
+%% Contains more float temps live at a single point than there are float
+%% registers in any backend
+
+test_float_spills() ->
+    {{{2942.0,4670.0,3198.0,4926.0,2206.0,4734.0},
+      {3118.0,2062.0,5174.0,3038.0,3618.0,3014.0},
+      {2542.0,2062.0,4934.0,2590.0,3098.0,3062.0},
+      {2950.0,3666.0,2574.0,5038.0,1866.0,2946.0},
+      {3126.0,3050.0,3054.0,5070.0,2258.0,2714.0},
+      {4734.0,2206.0,4926.0,3198.0,4670.0,2942.0}},
+     58937.0} =
+	mat66_flip_sum(35.0,86.0,32.0,88.0,33.0,57.0,
+		       22.0,77.0,91.0,80.0,14.0,33.0,
+		       51.0,28.0,87.0,20.0,91.0,11.0,
+		       68.0,83.0,64.0,82.0,10.0,86.0,
+		       74.0,18.0,08.0,52.0,10.0,14.0,
+		       89.0,34.0,64.0,66.0,58.0,55.0,
+		       0.0, 5),
+    ok.
+
+mat66_flip_sum(M11, M12, M13, M14, M15, M16,
+	       M21, M22, M23, M24, M25, M26,
+	       M31, M32, M33, M34, M35, M36,
+	       M41, M42, M43, M44, M45, M46,
+	       M51, M52, M53, M54, M55, M56,
+	       M61, M62, M63, M64, M65, M66,
+	       Acc, Ctr)
+  when is_float(M11), is_float(M12), is_float(M13),
+       is_float(M14), is_float(M15), is_float(M16),
+       is_float(M21), is_float(M22), is_float(M23),
+       is_float(M24), is_float(M25), is_float(M26),
+       is_float(M31), is_float(M32), is_float(M33),
+       is_float(M34), is_float(M35), is_float(M36),
+       is_float(M41), is_float(M42), is_float(M43),
+       is_float(M44), is_float(M45), is_float(M46),
+       is_float(M51), is_float(M52), is_float(M53),
+       is_float(M54), is_float(M55), is_float(M56),
+       is_float(M61), is_float(M62), is_float(M63),
+       is_float(M64), is_float(M65), is_float(M66),
+       is_float(Acc) ->
+    R11 = M66+M11, R12 = M65+M12, R13 = M64+M13,
+    R14 = M63+M14, R15 = M62+M15, R16 = M61+M16,
+    R21 = M56+M21, R22 = M55+M22, R23 = M54+M23,
+    R24 = M53+M24, R25 = M52+M25, R26 = M51+M26,
+    R31 = M46+M31, R32 = M45+M32, R33 = M44+M33,
+    R34 = M43+M34, R35 = M42+M35, R36 = M41+M36,
+    R41 = M26+M41, R42 = M25+M42, R43 = M24+M43,
+    R44 = M23+M44, R45 = M22+M45, R46 = M21+M46,
+    R51 = M36+M51, R52 = M35+M52, R53 = M34+M53,
+    R54 = M33+M54, R55 = M32+M55, R56 = M31+M56,
+    R61 = M16+M61, R62 = M15+M62, R63 = M14+M63,
+    R64 = M13+M64, R65 = M12+M65, R66 = M11+M66,
+    case Ctr of
+	0 ->
+	    {{{R11, R12, R13, R14, R15, R16},
+	      {R21, R22, R23, R24, R25, R26},
+	      {R31, R32, R33, R34, R35, R36},
+	      {R41, R42, R43, R44, R45, R46},
+	      {R51, R52, R53, R54, R55, R56},
+	      {R61, R62, R63, R64, R65, R66}},
+	     Acc};
+	_ ->
+	    NewAcc = 0.0 + M11 + M12 + M13 + M14 + M15 + M16 +
+		+ M21 + M22 + M23 + M24 + M25 + M26
+		+ M31 + M32 + M33 + M34 + M35 + M36
+		+ M41 + M42 + M43 + M44 + M45 + M46
+		+ M51 + M52 + M53 + M54 + M55 + M56
+		+ M61 + M62 + M63 + M64 + M65 + M66
+		+ Acc,
+	    mat66_flip_sum(R11+1.0, R12+1.0, R13+1.0, R14+1.0, R15+1.0, R16+1.0,
+			   R21+1.0, R22+1.0, R23+1.0, R24+1.0, R25+1.0, R26+1.0,
+			   R31+1.0, R32+1.0, R33+1.0, R34+1.0, R35+1.0, R36+1.0,
+			   R41+1.0, R42+1.0, R43+1.0, R44+1.0, R45+1.0, R46+1.0,
+			   R51+1.0, R52+1.0, R53+1.0, R54+1.0, R55+1.0, R56+1.0,
+			   R61+1.0, R62+1.0, R63+1.0, R64+1.0, R65+1.0, R66+1.0,
+			   NewAcc, Ctr-1)
+    end.
+
+%% Infinite loops must receive reduction tests, and might trip up basic block
+%% weighting, leading to infinite weights and/or divisions by zero.
+
+test_infinite_loops() ->
+  OldTrapExit = process_flag(trap_exit, true),
+  ok = test_infinite_loop(fun infinite_recursion/0),
+  ok = test_infinite_loop(fun infinite_corecursion/0),
+  RecursiveFun = fun RecursiveFun() -> RecursiveFun() end,
+  ok = test_infinite_loop(RecursiveFun),
+  CorecursiveFunA = fun CorecursiveFunA() ->
+		       CorecursiveFunA1 = fun () -> CorecursiveFunA() end,
+		       CorecursiveFunA1()
+		   end,
+  ok = test_infinite_loop(CorecursiveFunA),
+  CorecursiveFunB1 = fun(CorecursiveFunB) -> CorecursiveFunB() end,
+  CorecursiveFunB = fun CorecursiveFunB() ->
+		       CorecursiveFunB1(CorecursiveFunB)
+		   end,
+  ok = test_infinite_loop(CorecursiveFunB),
+  CorecursiveFunC1 = fun CorecursiveFunC1(Other) ->
+			 Other(CorecursiveFunC1)
+		     end,
+  CorecursiveFunC = fun CorecursiveFunC(Other) ->
+		       Other(CorecursiveFunC)
+		   end,
+  ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC1) end),
+  ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC) end),
+  true = process_flag(trap_exit, OldTrapExit),
+  ok.
+
+-define(INFINITE_LOOP_TIMEOUT, 100).
+test_infinite_loop(Fun) ->
+  Tester = spawn_link(Fun),
+  kill_soon(Tester),
+  receive {'EXIT', Tester, awake} ->
+      undefined = process_info(Tester),
+      ok
+  after ?INFINITE_LOOP_TIMEOUT -> error(timeout)
+  end.
+
+infinite_recursion() -> infinite_recursion().
+
+infinite_corecursion() -> infinite_corecursion_1().
+infinite_corecursion_1() -> infinite_corecursion().
+
+kill_soon(Pid) ->
+  _ = spawn_link(fun() ->
+		     timer:sleep(1),
+		     erlang:exit(Pid, awake)
+		 end),
+  ok.
diff --git a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl
index 94c187e364..96e39d565a 100644
--- a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl
+++ b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl
@@ -55,6 +55,8 @@ test_element(T0, T1, T2, N) ->
   List = lists:seq(1, N),
   Tuple = list_to_tuple(List),
   ok = get_elements(List, Tuple, 1),
+  %% element/2 of larger tuple with omitted bounds test
+  true = lists:all(fun(I) -> I * I =:= square(I) end, lists:seq(1, 20)),
   %% some cases that throw exceptions
   {'EXIT', _} = (catch my_element(0, T2)),
   {'EXIT', _} = (catch my_element(3, T2)),
@@ -73,6 +75,18 @@ get_elements([Element|Rest], Tuple, Pos) ->
 get_elements([], _Tuple, _Pos) ->
   ok.
 
+squares() ->
+  {1*1,   2*2,   3*3,   4*4,   5*5,   6*6,   7*7,   8*8,   9*9,   10*10,
+   11*11, 12*12, 13*13, 14*14, 15*15, 16*16, 17*17, 18*18, 19*19, 20*20}.
+
+square(N) when is_integer(N), N >= 1, N =< 20 ->
+  %% The guard tests lets the range analysis conclude N to be an integer in the
+  %% 1..20 range. 20-1=19 is bigger than ?SET_LIMIT in erl_types.erl, and will
+  %% thus be represented by an ?int_range() rather than an ?int_set().
+  %% Because of the range analysis, the bounds test of this element/2 call
+  %% should be omitted.
+  element(N, squares()).
+
 %%--------------------------------------------------------------------
 %% Tests set_element/3.
 
diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile
index 04de7f7823..eeb81ac482 100644
--- a/lib/hipe/util/Makefile
+++ b/lib/hipe/util/Makefile
@@ -48,7 +48,7 @@ HIPE_MODULES = hipe_vectors
 else
 HIPE_MODULES =
 endif
-MODULES = hipe_timing hipe_dot hipe_digraph $(HIPE_MODULES)
+MODULES = hipe_timing hipe_dot hipe_digraph hipe_dsets $(HIPE_MODULES)
 
 HRL_FILES=
 ERL_FILES= $(MODULES:%=%.erl)
diff --git a/lib/hipe/util/hipe_dsets.erl b/lib/hipe/util/hipe_dsets.erl
new file mode 100644
index 0000000000..9492cab0ff
--- /dev/null
+++ b/lib/hipe/util/hipe_dsets.erl
@@ -0,0 +1,84 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       IMMUTABLE DISJOINT SETS OF ARBITRARY TERMS
+%%
+%% The disjoint set forests data structure, for elements of arbitrary types.
+%% Note that the find operation mutates the set.
+%%
+%% We could do this more efficiently if we restricted the elements to integers,
+%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used,
+%% for a persistent interface (which isn't that nice when even accessors return
+%% modified copies), the array module could be used.
+-module(hipe_dsets).
+
+-export([new/1, find/2, union/3, to_map/1, to_rllist/1]).
+-export_type([dsets/1]).
+
+-opaque dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}.
+
+-spec new([E]) -> dsets(E).
+new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]).
+
+-spec find(E, dsets(E)) -> {E, dsets(E)}.
+find(E, DS0) ->
+  case DS0 of
+    #{E := {root,_}} -> {E, DS0};
+    #{E := {node,N}} ->
+      case find(N, DS0) of
+	{N, _}=T -> T;
+	{R, DS1} -> {R, DS1#{E := {node,R}}}
+      end;
+    _ -> error(badarg, [E, DS0])
+  end.
+
+-spec union(E, E, dsets(E)) -> dsets(E).
+union(X, Y, DS0) ->
+  {XRoot, DS1} = find(X, DS0),
+  case find(Y, DS1) of
+    {XRoot, DS2} -> DS2;
+    {YRoot, DS2} ->
+      #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2,
+      if XRR < YRR -> DS2#{XRoot := {node,YRoot}};
+	 XRR > YRR -> DS2#{YRoot := {node,XRoot}};
+	 true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}}
+      end
+  end.
+
+-spec to_map(dsets(E)) -> {#{Elem::E => Root::E}, dsets(E)}.
+to_map(DS) ->
+  to_map(maps:keys(DS), DS, #{}).
+
+to_map([], DS, Acc) -> {Acc, DS};
+to_map([K|Ks], DS0, Acc) ->
+  {KR, DS} = find(K, DS0),
+  to_map(Ks, DS, Acc#{K => KR}).
+
+-spec to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}.
+to_rllist(DS0) ->
+  {Lists, DS} = to_rllist(maps:keys(DS0), #{}, DS0),
+  {maps:to_list(Lists), DS}.
+
+to_rllist([], Acc, DS) -> {Acc, DS};
+to_rllist([E|Es], Acc, DS0) ->
+  {ERoot, DS} = find(E, DS0),
+  to_rllist(Es, map_append(ERoot, E, Acc), DS).
+
+map_append(Key, Elem, Map) ->
+  case Map of
+    #{Key := List} -> Map#{Key := [Elem|List]};
+    #{} -> Map#{Key => [Elem]}
+  end.
diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl
index fc4e4edb24..788dacd11b 100644
--- a/lib/hipe/util/hipe_vectors.erl
+++ b/lib/hipe/util/hipe_vectors.erl
@@ -116,8 +116,7 @@ get(Vec, Ix) ->
 %% ---------------------------------------------------------------------
 
 -ifdef(USE_ARRAYS).
-%%-opaque vector(E) :: array:array(E).
--type vector(E) :: array:array(E). % Work around dialyzer bug
+-opaque vector(E) :: array:array(E).
 
 new(N, V) -> array:new(N, {default, V}).
 size(V) -> array:size(V).
diff --git a/lib/hipe/vsn.mk b/lib/hipe/vsn.mk
index cb4174381a..172d976931 100644
--- a/lib/hipe/vsn.mk
+++ b/lib/hipe/vsn.mk
@@ -1 +1 @@
-HIPE_VSN = 3.15.3
+HIPE_VSN = 3.15.4
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
index 29cad6ca51..31e4f6e4ac 100644
--- a/lib/hipe/x86/hipe_rtl_to_x86.erl
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -124,7 +124,6 @@ conv_insn(I, Map, Data) ->
 		     hipe_rtl:call_continuation(I),
 		     hipe_rtl:call_fail(I),
 		     hipe_rtl:call_type(I)),
-      %% XXX Fixme: this ++ is probably inefficient.
       {FixArgs++I2, Map2, Data};
     #comment{} ->
       I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))],
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
index cc1c75b04d..f514dd1ded 100644
--- a/lib/hipe/x86/hipe_x86.erl
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -167,6 +167,12 @@
 
      mk_pseudo_spill/1,
 
+	 mk_pseudo_spill_fmove/3,
+	 is_pseudo_spill_fmove/1,
+
+	 mk_pseudo_spill_move/3,
+	 is_pseudo_spill_move/1,
+
 	 mk_pseudo_tailcall/4,
 	 %% is_pseudo_tailcall/1,
 	 pseudo_tailcall_fun/1,
@@ -425,6 +431,14 @@ mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) ->
 mk_pseudo_spill(List) ->
     #pseudo_spill{args=List}.
 
+mk_pseudo_spill_fmove(Src, Temp, Dst) ->
+    #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}.
+is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove).
+
+mk_pseudo_spill_move(Src, Temp, Dst) ->
+    #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}.
+is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move).
+
 mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) ->
     check_linkage(Linkage),
     #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}.
diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl
index 567848bae5..6cd69905b2 100644
--- a/lib/hipe/x86/hipe_x86.hrl
+++ b/lib/hipe/x86/hipe_x86.hrl
@@ -91,6 +91,8 @@
 -record(pseudo_call, {'fun', sdesc, contlab, linkage}).
 -record(pseudo_jcc, {cc, true_label, false_label, pred}).
 -record(pseudo_spill, {args=[]}).
+-record(pseudo_spill_move, {src, temp, dst}).
+-record(pseudo_spill_fmove, {src, temp, dst}).
 -record(pseudo_tailcall, {'fun', arity, stkargs, linkage}).
 -record(pseudo_tailcall_prepare, {}).
 -record(push, {src}).
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
index ef9c32ef41..50919bdf4e 100644
--- a/lib/hipe/x86/hipe_x86_assemble.erl
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -63,7 +63,7 @@ assemble(CompiledCode, Closures, Exports, Options) ->
 	  || {MFA, Defun} <- CompiledCode],
   %%
   {ConstAlign,ConstSize,ConstMap,RefsFromConsts} =
-    hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()),
+    hipe_pack_constants:pack_constants(Code),
   %%
   {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} =
     encode(translate(Code, ConstMap, Options), Options),
@@ -148,6 +148,8 @@ insn_size(I) ->
 
 translate_insn(I, Context, Options) ->
   case I of
+    #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} ->
+      [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}];
     #alu{} ->
       Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context),
       [{hipe_x86:alu_op(I), Arg, I}];
@@ -228,11 +230,11 @@ translate_insn(I, Context, Options) ->
     #move64{} ->
       translate_move64(I, Context);
     #movsx{} ->
-      Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)),
-      [{movsx, Arg, I}];
+      Src = resolve_movx_src(hipe_x86:movsx_src(I)),
+      [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}];
     #movzx{} ->
-      Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)),
-      [{movzx, Arg, I}];
+      Src = resolve_movx_src(hipe_x86:movzx_src(I)),
+      [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}];
     %% pseudo_call: eliminated before assembly
     %% pseudo_jcc: eliminated before assembly
     %% pseudo_tailcall: eliminated before assembly
@@ -845,16 +847,15 @@ translate_move64(I, _Context) -> exit({?MODULE, I}).
 -endif.
 
 %%% mov{s,z}x
-resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) ->
-  {temp_to_regArch(Dst),
-   case Type of
-     byte ->
-       mem_to_rm8(Src);
-     int16 ->
-       mem_to_rm16(Src);
-     int32 ->
-       mem_to_rm32(Src)
-   end}.
+resolve_movx_src(Src=#x86_mem{type=Type}) ->
+  case Type of
+    byte ->
+      mem_to_rm8(Src);
+    int16 ->
+      mem_to_rm16(Src);
+    int32 ->
+      mem_to_rm32(Src)
+  end.
 
 %%% alu/cmp (_not_ test)
 resolve_alu_args(Src, Dst, Context) ->
diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl
index a4544e1086..0a3c0fc9d6 100644
--- a/lib/hipe/x86/hipe_x86_cfg.erl
+++ b/lib/hipe/x86/hipe_x86_cfg.erl
@@ -19,7 +19,7 @@
          succ/2, pred/2,
          bb/2, bb_add/3, map_bbs/2, fold_bbs/3]).
 -export([postorder/1, reverse_postorder/1]).
--export([linearise/1, params/1, arity/1, redirect_jmp/3]).
+-export([linearise/1, params/1, arity/1, redirect_jmp/3, branch_preds/1]).
 
 %%% these tell cfg.inc what to define (ugly as hell)
 -define(PRED_NEEDED,true).
@@ -72,6 +72,26 @@ branch_successors(Branch) ->
 	#ret{} -> []
     end.
 
+branch_preds(Branch) ->
+  case Branch of
+    #jmp_switch{labels=Labels} ->
+      Prob = 1.0/length(Labels),
+      [{L, Prob} || L <- Labels];
+    #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=[]}} ->
+      %% A function can still cause an exception, even if we won't catch it
+      [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}];
+    #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} ->
+      CallExnPred = hipe_bb_weights:call_exn_pred(),
+      [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}];
+    #pseudo_jcc{true_label=TrueLab,false_label=FalseLab,pred=Pred} ->
+      [{FalseLab, 1.0-Pred}, {TrueLab, Pred}];
+    _ ->
+      case branch_successors(Branch) of
+	[] -> [];
+	[Single] -> [{Single, 1.0}]
+      end
+    end.
+
 -ifdef(REMOVE_TRIVIAL_BBS_NEEDED).
 fails_to(_Instr) -> [].
 -endif.
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
index 5d7fadf8e5..2731836dc1 100644
--- a/lib/hipe/x86/hipe_x86_defuse.erl
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -51,6 +51,8 @@ insn_def(I) ->
     #movzx{dst=Dst} -> dst_def(Dst);
     #pseudo_call{} -> call_clobbered();
     #pseudo_spill{} -> [];
+    #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst];
+    #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst];
     #pseudo_tailcall_prepare{} -> tailcall_clobbered();
     #shift{dst=Dst} -> dst_def(Dst);
     %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label
@@ -108,6 +110,8 @@ insn_use(I) ->
     #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} ->
       addtemp(Fun, arity_use(Arity));
     #pseudo_spill{args=Args} -> Args;
+    #pseudo_spill_fmove{src=Src} -> [Src];
+    #pseudo_spill_move{src=Src} -> [Src];
     #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} ->
       addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(),
 					      arity_use(Arity))));
diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl
index 3c2b67967a..558321d0c3 100644
--- a/lib/hipe/x86/hipe_x86_frame.erl
+++ b/lib/hipe/x86/hipe_x86_frame.erl
@@ -95,13 +95,17 @@ do_insn(I, LiveOut, Context, FPoff) ->
     #imul{} ->
       {[do_imul(I, Context, FPoff)], FPoff};
     #move{} ->
-      {[do_move(I, Context, FPoff)], FPoff};
+      {do_move(I, Context, FPoff), FPoff};
     #movsx{} ->
       {[do_movsx(I, Context, FPoff)], FPoff};
     #movzx{} ->
       {[do_movzx(I, Context, FPoff)], FPoff};
     #pseudo_call{} ->
       do_pseudo_call(I, LiveOut, Context, FPoff);
+    #pseudo_spill_fmove{} ->
+      {do_pseudo_spill_fmove(I, Context, FPoff), FPoff};
+    #pseudo_spill_move{} ->
+      {do_pseudo_spill_move(I, Context, FPoff), FPoff};
     #pseudo_tailcall{} ->
       {do_pseudo_tailcall(I, Context), context_framesize(Context)};
     #push{} ->
@@ -144,22 +148,50 @@ do_fp_binop(I, Context, FPoff) ->
   Dst = conv_opnd(Dst0, FPoff, Context),
   [I#fp_binop{src=Src,dst=Dst}].
 
-do_fmove(I, Context, FPoff) ->
-  #fmove{src=Src0,dst=Dst0} = I,
+do_fmove(I0, Context, FPoff) ->
+  #fmove{src=Src0,dst=Dst0} = I0,
   Src = conv_opnd(Src0, FPoff, Context),
   Dst = conv_opnd(Dst0, FPoff, Context),
-  I#fmove{src=Src,dst=Dst}.
+  I = I0#fmove{src=Src,dst=Dst},
+  case Src =:= Dst of
+    true -> []; % omit move-to-self
+    false -> [I]
+  end.
+
+do_pseudo_spill_fmove(I0, Context, FPoff) ->
+  #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0,
+  Src = conv_opnd(Src0, FPoff, Context),
+  Temp = conv_opnd(Temp0, FPoff, Context),
+  Dst = conv_opnd(Dst0, FPoff, Context),
+  case Src =:= Dst of
+    true -> []; % omit move-to-self
+    false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}]
+  end.
 
 do_imul(I, Context, FPoff) ->
   #imul{src=Src0} = I,
   Src = conv_opnd(Src0, FPoff, Context),
   I#imul{src=Src}.
 
-do_move(I, Context, FPoff) ->
-  #move{src=Src0,dst=Dst0} = I,
+do_move(I0, Context, FPoff) ->
+  #move{src=Src0,dst=Dst0} = I0,
   Src = conv_opnd(Src0, FPoff, Context),
   Dst = conv_opnd(Dst0, FPoff, Context),
-  I#move{src=Src,dst=Dst}.
+  I = I0#move{src=Src,dst=Dst},
+  case Src =:= Dst of
+    true -> []; % omit move-to-self
+    false -> [I]
+  end.
+
+do_pseudo_spill_move(I0, Context, FPoff) ->
+  #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0,
+  Src = conv_opnd(Src0, FPoff, Context),
+  Temp = conv_opnd(Temp0, FPoff, Context),
+  Dst = conv_opnd(Dst0, FPoff, Context),
+  case Src =:= Dst of
+    true -> []; % omit move-to-self
+    false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}]
+  end.
 
 do_movsx(I, Context, FPoff) ->
   #movsx{src=Src0,dst=Dst0} = I,
diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
index b84e9bed91..925054dd68 100644
--- a/lib/hipe/x86/hipe_x86_postpass.erl
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -57,9 +57,10 @@ postpass(#defun{code=Code0}=Defun, Options) ->
 peephole_optimization(Insns) -> 
   peep(Insns, [], []).
 
-%% MoveSelf related peep-opts 
+
+%% MoveSelf related peep-opts
 %% ------------------------------
-peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> 
+peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) ->
     peep(Insns, Res, [moveSelf1|Lst]);
 peep([I=#fmove{src=Src, dst=Dst}, 
       #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> 
@@ -159,8 +160,7 @@ peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) ->
 
 %% ElimSet0
 %% --------
-peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) 
-when (Dst==#x86_temp{}) ->
+peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) ->
   peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]);    
 
 %% ElimMDPow2
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
index 4273e3cee8..e8abe78e00 100644
--- a/lib/hipe/x86/hipe_x86_ra_finalise.erl
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -140,6 +140,16 @@ ra_insn(I, Map, FpMap) ->
       I#pseudo_call{'fun'=Fun};
     #pseudo_jcc{} ->
       I;
+    #pseudo_spill_fmove{src=Src0, temp=Temp0, dst=Dst0} ->
+      Src = ra_opnd(Src0, Map, FpMap),
+      Temp = ra_opnd(Temp0, Map, FpMap),
+      Dst = ra_opnd(Dst0, Map, FpMap),
+      I#pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst};
+    #pseudo_spill_move{src=Src0, temp=Temp0, dst=Dst0} ->
+      Src = ra_opnd(Src0, Map),
+      Temp = ra_opnd(Temp0, Map),
+      Dst = ra_opnd(Dst0, Map),
+      I#pseudo_spill_move{src=Src, temp=Temp, dst=Dst};
     #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} ->
       Fun = ra_opnd(Fun0, Map),
       StkArgs = ra_args(StkArgs0, Map),
diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
index 28ec9c4277..db6391d5c1 100644
--- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl
+++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
@@ -74,6 +74,8 @@ do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
       do_movx(I, TempMap, Strategy);
     #fmove{} ->
       do_fmove(I, TempMap, Strategy);
+    #pseudo_spill_move{} ->
+      do_pseudo_spill_move(I, TempMap, Strategy);
     #shift{} ->
       do_shift(I, TempMap, Strategy);
     #test{} ->
@@ -190,10 +192,19 @@ do_lea(I, TempMap, Strategy) ->
 
 do_move(I, TempMap, Strategy) ->
   #move{src=Src0,dst=Dst0} = I,
-  {FixSrc, Src, FixDst, Dst, DidSpill} =
-    do_check_byte_move(Src0, Dst0, TempMap, Strategy),
-  {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}],
-   DidSpill}.
+  case
+    is_record(Src0, x86_temp) andalso is_record(Dst0, x86_temp)
+    andalso is_spilled(Src0, TempMap) andalso is_spilled(Dst0, TempMap)
+  of
+    true ->
+      Tmp = clone(Src0, Strategy),
+      {[hipe_x86:mk_pseudo_spill_move(Src0, Tmp, Dst0)], true};
+    false ->
+      {FixSrc, Src, FixDst, Dst, DidSpill} =
+	do_check_byte_move(Src0, Dst0, TempMap, Strategy),
+      {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}],
+       DidSpill}
+  end.
 
 -ifdef(HIPE_AMD64).
 
@@ -287,6 +298,13 @@ do_fmove(I, TempMap, Strategy) ->
   {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}],
    DidSpill1 or DidSpill2}.
 
+%%% Fix an pseudo_spill_move op.
+
+do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) ->
+  %% Temp is above the low water mark and must not have been spilled
+  false = is_spilled(Temp, TempMap),
+  {[I], false}. % nothing to do
+
 %%% Fix a shift operation.
 %%% 1. remove pseudos from any explicit memory operands
 %%% 2. if the source is a register or memory position
diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl
index 7b5fb1352b..7db3b23d92 100644
--- a/lib/hipe/x86/hipe_x86_subst.erl
+++ b/lib/hipe/x86/hipe_x86_subst.erl
@@ -19,7 +19,7 @@
 -endif.
 
 -module(?HIPE_X86_SUBST).
--export([insn_temps/2]).
+-export([insn_temps/2, insn_lbls/2]).
 -include("../x86/hipe_x86.hrl").
 
 %% These should be moved to hipe_x86 and exported
@@ -28,6 +28,7 @@
 -type mfarec() :: #x86_mfa{}.
 -type prim()   :: #x86_prim{}.
 -type funv()   :: mfarec() | prim() | temp().
+-type label()  :: non_neg_integer().
 -type insn()   :: tuple(). % for now
 
 -type subst_fun() :: fun((temp()) -> temp()).
@@ -49,14 +50,19 @@ insn_temps(SubstTemp, I) ->
     #movzx   {src=S, dst=D}  -> I#movzx   {src=O(S), dst=O(D)};
     #shift   {src=S, dst=D}  -> I#shift   {src=O(S), dst=O(D)};
     #test    {src=S, dst=D}  -> I#test    {src=O(S), dst=O(D)};
-    #fp_unop{arg=A} -> I#fp_unop{arg=O(A)};
-    #move64 {dst=D} -> I#move64 {dst=O(D)};
-    #push   {src=S} -> I#push   {src=O(S)};
-    #pop    {dst=D} -> I#pop    {dst=O(D)};
+    #fp_unop{arg=[]} -> I;
+    #fp_unop{arg=A}  -> I#fp_unop{arg=O(A)};
+    #move64 {dst=D}  -> I#move64 {dst=O(D)};
+    #push   {src=S}  -> I#push   {src=O(S)};
+    #pop    {dst=D}  -> I#pop    {dst=O(D)};
     #jmp_switch{temp=T, jtab=J} ->
       I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)};
     #pseudo_call{'fun'=F} ->
       I#pseudo_call{'fun'=funv_temps(SubstTemp, F)};
+    #pseudo_spill_fmove{src=S, temp=T, dst=D} ->
+      I#pseudo_spill_fmove{src=O(S), temp=O(T), dst=O(D)};
+    #pseudo_spill_move{src=S, temp=T, dst=D} ->
+      I#pseudo_spill_move{src=O(S), temp=O(T), dst=O(D)};
     #pseudo_tailcall{'fun'=F, stkargs=Stk} ->
       I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F),
 			stkargs=lists:map(O, Stk)};
@@ -85,3 +91,22 @@ jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T).
 -else.
 jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl.
 -endif.
+
+-type lbl_subst_fun() :: fun((label()) -> label()).
+
+%% @doc Maps over the branch targets in an instruction
+-spec insn_lbls(lbl_subst_fun(), insn()) -> insn().
+insn_lbls(SubstLbl, I) ->
+  case I of
+    #jmp_label{label=Label} ->
+      I#jmp_label{label=SubstLbl(Label)};
+    #pseudo_call{sdesc=Sdesc, contlab=Contlab} ->
+      I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc),
+		    contlab=SubstLbl(Contlab)};
+    #pseudo_jcc{true_label=T, false_label=F} ->
+      I#pseudo_jcc{true_label=SubstLbl(T), false_label=SubstLbl(F)}
+  end.
+
+sdesc_lbls(_SubstLbl, Sdesc=#x86_sdesc{exnlab=[]}) -> Sdesc;
+sdesc_lbls(SubstLbl, Sdesc=#x86_sdesc{exnlab=Exnlab}) ->
+  Sdesc#x86_sdesc{exnlab=SubstLbl(Exnlab)}.