14 files changed, 327 insertions, 137 deletions
diff --git a/lib/hipe/arm/Makefile b/lib/hipe/arm/Makefile
index 00b6732afa..ed2eccf428 100644
--- a/lib/hipe/arm/Makefile
+++ b/lib/hipe/arm/Makefile
@@ -61,6 +61,7 @@ MODULES=hipe_arm \
 	hipe_arm_ra_naive \
 	hipe_arm_ra_postconditions \
 	hipe_arm_registers \
+	hipe_arm_subst \
 	hipe_rtl_to_arm
 
 HRL_FILES=hipe_arm.hrl
diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl
index f2fa0a5164..2fb6675da9 100644
--- a/lib/hipe/arm/hipe_arm_cfg.erl
+++ b/lib/hipe/arm/hipe_arm_cfg.erl
@@ -24,6 +24,7 @@
 -export([init/1,
          labels/1, start_label/1,
          succ/2,
+         map_bbs/2, fold_bbs/3,
          bb/2, bb_add/3]).
 -export([postorder/1]).
 -export([linearise/1]).
@@ -35,6 +36,7 @@
 -define(BREADTH_ORDER,true).  % for linear scan
 -define(PARAMS_NEEDED,true).
 -define(START_LABEL_UPDATE_NEEDED,true).
+-define(MAP_FOLD_NEEDED,true).
 
 -include("hipe_arm.hrl").
 -include("../flow/cfg.hrl").
diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl
index f57b0e601c..f92cf4f82a 100644
--- a/lib/hipe/arm/hipe_arm_defuse.erl
+++ b/lib/hipe/arm/hipe_arm_defuse.erl
@@ -22,6 +22,7 @@
 -module(hipe_arm_defuse).
 -export([insn_def_all/1, insn_use_all/1]).
 -export([insn_def_gpr/1, insn_use_gpr/1]).
+-export([insn_defs_all_gpr/1]).
 -include("hipe_arm.hrl").
 
 %%%
@@ -55,6 +56,12 @@ insn_def_gpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_gpr(I) ->
+  case I of
+    #pseudo_call{} -> true;
+    _ -> false
+  end.
+
 call_clobbered_gpr() ->
   [hipe_arm:mk_temp(R, T)
    || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()].
diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl
index a4b2f9c73c..55651d7180 100644
--- a/lib/hipe/arm/hipe_arm_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_finalise.erl
@@ -20,13 +20,17 @@
 %%
 
 -module(hipe_arm_finalise).
--export([finalise/1]).
+-export([finalise/2]).
 -include("hipe_arm.hrl").
 
-finalise(Defun) ->
+finalise(Defun, Options) ->
   #defun{code=Code0} = Defun,
-  Code1 = peep(expand(Code0)),
-  Defun#defun{code=Code1}.
+  Code1Rev = expand(Code0),
+  Code2 = case proplists:get_bool(peephole, Options) of
+	    true -> peep(Code1Rev);
+	    false -> lists:reverse(Code1Rev)
+	  end,
+  Defun#defun{code=Code2}.
 
 expand(Insns) ->
   expand_list(Insns, []).
@@ -34,7 +38,7 @@ expand(Insns) ->
 expand_list([I|Insns], Accum) ->
   expand_list(Insns, expand_insn(I, Accum));
 expand_list([], Accum) ->
-  lists:reverse(Accum).
+  Accum.
 
 expand_insn(I, Accum) ->
   case I of
@@ -63,12 +67,67 @@ expand_insn(I, Accum) ->
       [I|Accum]
   end.
 
-peep(Insns) ->
-  peep_list(Insns, []).
+%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly
+%% ordered list). This way, we can do replacements that would take multiple
+%% passes with an in-order peephole optimiser.
+%%
+%% N.B., if a rule wants to produce multiple instructions (even if some of them
+%% are unchanged, it should push the additional instructions on the More list,
+%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns)
+%% should have been peepholed previously.
+peep(RevInsns) ->
+  peep_list_skip([], RevInsns).
+
+peep_list([#b_label{'cond'='al',label=Label}
+	   | (Insns = [#label{label=Label}|_])], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst}
+		,am1=#arm_temp{reg=Dst}}|Insns], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}}
+	   |Insns], More) when Imm > 0, Imm =< 8 ->
+  peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}}
+	    |Insns], More);
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}}
+	   |Insns], More) when Imm >= 24, Imm < 32 ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More);
+
+%% XXX: Load-after-store optimisation should also be applied to RTL, where it
+%% can be more general, expose opportunities for constant propagation, etc.
+peep_list([#store{stop='strb',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns],
+	    [Str|More]);
+peep_list([#store{stop='str',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]);
+
+peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}}
+	   |Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={Mask band (bnot InvMask),0}} | Insns], More);
+
+%% XXX: The place that generates brain-dead code like the following should be
+%% fixed rather than trying to patch it over here.
+peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}}
+	   | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More);
+
+peep_list(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list(Accum, []) ->
+  Accum.
 
-peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) ->
-  peep_list(Insns, Accum);
-peep_list([I|Insns], Accum) ->
-  peep_list(Insns, [I|Accum]);
-peep_list([], Accum) ->
-  lists:reverse(Accum).
+%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has
+%% already been peeped or is otherwise uninteresting (such as empty).
+peep_list_skip(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list_skip(Accum, []) ->
+  Accum.
diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl
index e1e441a967..9a349b47d3 100644
--- a/lib/hipe/arm/hipe_arm_frame.erl
+++ b/lib/hipe/arm/hipe_arm_frame.erl
@@ -27,16 +27,14 @@
 
 -define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet
 
-frame(Defun) ->
-  Formals = fix_formals(hipe_arm:defun_formals(Defun)),
-  Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals),
-  MinFrame = defun_minframe(Defun),
+frame(CFG) ->
+  Formals = fix_formals(hipe_arm_cfg:params(CFG)),
+  Temps0 = all_temps(CFG, Formals),
+  MinFrame = defun_minframe(CFG),
   Temps = ensure_minframe(MinFrame, Temps0),
-  ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)),
-  CFG0 = hipe_arm_cfg:init(Defun),
-  Liveness = ?LIVENESS_ALL:analyse(CFG0),
-  CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR),
-  hipe_arm_cfg:linearise(CFG1).
+  ClobbersLR = clobbers_lr(CFG),
+  Liveness = ?LIVENESS_ALL:analyse(CFG),
+  do_body(CFG, Liveness, Formals, Temps, ClobbersLR).
 
 fix_formals(Formals) ->
   fix_formals(hipe_arm_registers:nr_args(), Formals).
@@ -51,32 +49,21 @@ do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) ->
   do_prologue(CFG1, Context).
 
 do_blocks(CFG, Context) ->
-  Labels = hipe_arm_cfg:labels(CFG),
-  do_blocks(Labels, CFG, Context).
+  hipe_arm_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG).
 
-do_blocks([Label|Labels], CFG, Context) ->
+do_block(Label, Block, Context) ->
   Liveness = context_liveness(Context),
   LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label),
-  Block = hipe_arm_cfg:bb(CFG, Label),
   Code = hipe_bb:code(Block),
-  NewCode = do_block(Code, LiveOut, Context),
-  NewBlock = hipe_bb:code_update(Block, NewCode),
-  NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock),
-  do_blocks(Labels, NewCFG, Context);
-do_blocks([], CFG, _) ->
-  CFG.
-
-do_block(Insns, LiveOut, Context) ->
-  do_block(Insns, LiveOut, Context, context_framesize(Context), []).
+  NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []),
+  hipe_bb:code_update(Block, NewCode).
 
 do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->
   {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0),
   do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode));
 do_block([], _, Context, FPoff, RevCode) ->
   FPoff0 = context_framesize(Context),
-  if FPoff =:= FPoff0 -> [];
-     true -> exit({?MODULE,do_block,FPoff})
-  end,
+  FPoff0 = FPoff,
   lists:reverse(RevCode, []).
 
 do_insn(I, LiveOut, Context, FPoff) ->
@@ -543,39 +530,46 @@ temp_is_pseudo(Temp) ->
 %%% Detect if a Defun's body clobbers LR.
 %%%
 
-clobbers_lr(Insns) ->
+clobbers_lr(CFG) ->
   LRreg = hipe_arm_registers:lr(),
   LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'),
   LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'),
-  clobbers_lr(Insns, LRtagged, LRuntagged).
-
-clobbers_lr([I|Insns], LRtagged, LRuntagged) ->
-  Defs = hipe_arm_defuse:insn_def_gpr(I),
-  case lists:member(LRtagged, Defs) of
-    true -> true;
-    false ->
-      case lists:member(LRuntagged, Defs) of
-	true -> true;
-	false -> clobbers_lr(Insns, LRtagged, LRuntagged)
-      end
-  end;
-clobbers_lr([], _LRtagged, _LRuntagged) -> false.
+  any_insn(fun(I) ->
+	       Defs = hipe_arm_defuse:insn_def_gpr(I),
+	       lists:member(LRtagged, Defs)
+		 orelse lists:member(LRuntagged, Defs)
+	   end, CFG).
+
+any_insn(Pred, CFG) ->
+  %% Abuse fold to do an efficient "any"-operation using nonlocal control flow
+  FoundSatisfying = make_ref(),
+  try fold_insns(fun (I, _) ->
+		     case Pred(I) of
+		       true -> throw(FoundSatisfying);
+		       false -> false
+		     end
+		 end, false, CFG)
+  of _ -> false
+  catch FoundSatisfying -> true
+  end.
 
 %%%
 %%% Build the set of all temps used in a Defun's body.
 %%%
 
-all_temps(Code, Formals) ->
-  S0 = find_temps(Code, tset_empty()),
+all_temps(CFG, Formals) ->
+  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),
   S1 = tset_del_list(S0, Formals),
   tset_filter(S1, fun(T) -> temp_is_pseudo(T) end).
 
-find_temps([I|Insns], S0) ->
+find_temps(I, S0) ->
   S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)),
-  S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)),
-  find_temps(Insns, S2);
-find_temps([], S) ->
-  S.
+  tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)).
+
+fold_insns(Fun, InitAcc, CFG) ->
+  hipe_arm_cfg:fold_bbs(
+    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end,
+    InitAcc, CFG).
 
 tset_empty() ->
   gb_sets:new().
@@ -604,16 +598,11 @@ tset_to_list(S) ->
 %%% in the middle of a tailcall.
 %%%
 
-defun_minframe(Defun) ->
-  MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0),
-  MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))),
+defun_minframe(CFG) ->
+  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG),
+  MyArity = length(fix_formals(hipe_arm_cfg:params(CFG))),
   erlang:max(MaxTailArity - MyArity, 0).
 
-body_mta([I|Code], MTA) ->
-  body_mta(Code, insn_mta(I, MTA));
-body_mta([], MTA) ->
-  MTA.
-
 insn_mta(I, MTA) ->
   case I of
     #pseudo_tailcall{arity=Arity} ->
diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl
index dce1193b24..8a7fa86394 100644
--- a/lib/hipe/arm/hipe_arm_main.erl
+++ b/lib/hipe/arm/hipe_arm_main.erl
@@ -24,15 +24,17 @@
 
 rtl_to_arm(MFA, RTL, Options) ->
   Defun1 = hipe_rtl_to_arm:translate(RTL),
+  CFG1 = hipe_arm_cfg:init(Defun1),
   %% io:format("~w: after translate\n", [?MODULE]),
   %% hipe_arm_pp:pp(Defun1),
-  Defun2 = hipe_arm_ra:ra(Defun1, Options),
+  CFG2 = hipe_arm_ra:ra(CFG1, Options),
   %% io:format("~w: after regalloc\n", [?MODULE]),
-  %% hipe_arm_pp:pp(Defun2),
-  Defun3 = hipe_arm_frame:frame(Defun2),
+  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)),
+  CFG3 = hipe_arm_frame:frame(CFG2),
+  Defun3 = hipe_arm_cfg:linearise(CFG3),
   %% io:format("~w: after frame\n", [?MODULE]),
   %% hipe_arm_pp:pp(Defun3),
-  Defun4 = hipe_arm_finalise:finalise(Defun3),
+  Defun4 = hipe_arm_finalise:finalise(Defun3, Options),
   %% io:format("~w: after finalise\n", [?MODULE]),
   pp(Defun4, MFA, Options),
   {native, arm, {unprofiled, Defun4}}.
diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl
index 2f65e864fd..bfb649326c 100644
--- a/lib/hipe/arm/hipe_arm_ra.erl
+++ b/lib/hipe/arm/hipe_arm_ra.erl
@@ -22,36 +22,40 @@
 -module(hipe_arm_ra).
 -export([ra/2]).
 
-ra(Defun0, Options) ->
-  %% hipe_arm_pp:pp(Defun0),
-  {Defun1, Coloring_fp, SpillIndex}
+ra(CFG0, Options) ->
+  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG0)),
+  {CFG1, _FPLiveness1, Coloring_fp, SpillIndex}
     = case proplists:get_bool(inline_fp, Options) of
 %%	true ->
-%%	  hipe_regalloc_loop:ra_fp(Defun0, Options,
+%%	  FPLiveness0 = hipe_arm_specific_fp:analyze(CFG0, no_context),
+%%	  hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options,
 %%				   hipe_coalescing_regalloc,
-%%				   hipe_arm_specific_fp);
+%%				   hipe_arm_specific_fp, no_context);
 	false ->
-	  {Defun0,[],0}
+	  {CFG0,undefined,[],0}
       end,
-  %% hipe_arm_pp:pp(Defun1),
-  {Defun2, Coloring}
+  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG1)),
+  GPLiveness1 = hipe_arm_specific:analyze(CFG1, no_context),
+  {CFG2, _GPLiveness2, Coloring}
     = case proplists:get_value(regalloc, Options, coalescing) of
 	coalescing ->
-	  ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc);
+	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc);
 	optimistic ->
-	  ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc);
+	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc);
 	graph_color ->
-	  ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc);
+	  ra(CFG1, GPLiveness1, SpillIndex, Options,
+	     hipe_graph_coloring_regalloc);
 	linear_scan ->
-	  hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options);
+	  hipe_arm_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options);
 	naive ->
-	  hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options);
+	  hipe_arm_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options);
         _ ->
 	  exit({unknown_regalloc_compiler_option,
 		proplists:get_value(regalloc,Options)})
       end,
-  %% hipe_arm_pp:pp(Defun2),
-  hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp).
+  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)),
+  hipe_arm_ra_finalise:finalise(CFG2, Coloring, Coloring_fp).
 
-ra(Defun, SpillIndex, Options, RegAllocMod) ->
-  hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific).
+ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) ->
+  hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod,
+			hipe_arm_specific, no_context).
diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl
index 4faeadcd7f..2a3fded147 100644
--- a/lib/hipe/arm/hipe_arm_ra_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl
@@ -23,12 +23,13 @@
 -export([finalise/3]).
 -include("hipe_arm.hrl").
 
-finalise(Defun, TempMap, _FPMap0=[]) ->
-  Code = hipe_arm:defun_code(Defun),
-  {_, SpillLimit} = hipe_arm:defun_var_range(Defun),
+finalise(CFG, TempMap, _FPMap0=[]) ->
+  {_, SpillLimit} = hipe_gensym:var_range(arm),
   Map = mk_ra_map(TempMap, SpillLimit),
-  NewCode = ra_code(Code, Map, []),
-  Defun#defun{code=NewCode}.
+  hipe_arm_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map) end, CFG).
+
+ra_bb(BB, Map) ->
+  hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])).
 
 ra_code([I|Insns], Map, Accum) ->
   ra_code(Insns, Map, [ra_insn(I, Map) | Accum]);
diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl
index d9a360d00c..0aa888da99 100644
--- a/lib/hipe/arm/hipe_arm_ra_ls.erl
+++ b/lib/hipe/arm/hipe_arm_ra_ls.erl
@@ -21,37 +21,35 @@
 %%% Linear Scan register allocator for ARM
 
 -module(hipe_arm_ra_ls).
--export([ra/3]).
+-export([ra/4]).
 
-ra(Defun, SpillIndex, Options) ->
-  NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options),
-  CFG = hipe_arm_cfg:init(NewDefun),
-  SpillLimit = hipe_arm_specific:number_of_temporaries(CFG),
-  alloc(NewDefun, SpillIndex, SpillLimit, Options).
+ra(CFG, Liveness, SpillIndex, Options) ->
+  SpillLimit = hipe_arm_specific:number_of_temporaries(CFG, no_context),
+  alloc(CFG, Liveness, SpillIndex, SpillLimit, Options).
 
-alloc(Defun, SpillIndex, SpillLimit, Options) ->
-  CFG = hipe_arm_cfg:init(Defun),
+alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) ->
   {Coloring, _NewSpillIndex} =
     regalloc(
-      CFG,
+      CFG, Liveness,
       hipe_arm_registers:allocatable_gpr()--
       [hipe_arm_registers:temp3(),
        hipe_arm_registers:temp2(),
        hipe_arm_registers:temp1()],
       [hipe_arm_cfg:start_label(CFG)],
       SpillIndex, SpillLimit, Options,
-      hipe_arm_specific),
-  {NewDefun, _DidSpill} =
+      hipe_arm_specific, no_context),
+  {NewCFG, _DidSpill} =
     hipe_arm_ra_postconditions:check_and_rewrite(
-      Defun, Coloring, 'linearscan'),
-  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific),
+      CFG, Coloring, 'linearscan'),
+  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context),
   {SpillMap, _NewSpillIndex2} =
-    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options,
-			     hipe_arm_specific, TempMap),
+    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options,
+			     hipe_arm_specific, no_context, TempMap),
   Coloring2 =
     hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap),
-  {NewDefun, Coloring2}.
+  {NewCFG, Liveness, Coloring2}.
 
-regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) ->
-  hipe_ls_regalloc:regalloc(
-    CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target).
+regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options,
+	 TgtMod, TgtCtx) ->
+  hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex,
+			    DontSpill, Options, TgtMod, TgtCtx).
diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl
index 6201269f44..395beff292 100644
--- a/lib/hipe/arm/hipe_arm_ra_naive.erl
+++ b/lib/hipe/arm/hipe_arm_ra_naive.erl
@@ -20,11 +20,11 @@
 %%
 
 -module(hipe_arm_ra_naive).
--export([ra/3]).
+-export([ra/4]).
 
 -include("hipe_arm.hrl").
 
-ra(Defun, _Coloring_fp, _Options) ->	% -> {Defun, Coloring}
-  {NewDefun,_DidSpill} =
-    hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'),
-  {NewDefun, []}.
+ra(CFG, Liveness, _Coloring_fp, _Options) ->	% -> {CFG, Liveness, Coloring}
+  {NewCFG,_DidSpill} =
+    hipe_arm_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'),
+  {NewCFG, Liveness, []}.
diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl
index 40978e65f6..412524e2e6 100644
--- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl
+++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl
@@ -25,17 +25,13 @@
 
 -include("hipe_arm.hrl").
 
-check_and_rewrite(Defun, Coloring, Allocator) ->
-  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific),
-  check_and_rewrite2(Defun, TempMap, Allocator).
+check_and_rewrite(CFG, Coloring, Allocator) ->
+  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context),
+  check_and_rewrite2(CFG, TempMap, Allocator).
 
-check_and_rewrite2(Defun, TempMap, Allocator) ->
+check_and_rewrite2(CFG, TempMap, Allocator) ->
   Strategy = strategy(Allocator),
-  #defun{code=Code0} = Defun,
-  {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false),
-  VarRange = {0, hipe_gensym:get_var(arm)},
-  {Defun#defun{code=Code1, var_range=VarRange},
-   DidSpill}.
+  do_bbs(hipe_arm_cfg:labels(CFG), TempMap, Strategy, CFG, false).
 
 strategy(Allocator) ->
   case Allocator of
@@ -44,6 +40,13 @@ strategy(Allocator) ->
     'naive' -> 'fixed'
   end.
 
+do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill};
+do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) ->
+  Code0 = hipe_bb:code(BB = hipe_arm_cfg:bb(CFG0, Lbl)),
+  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0),
+  CFG = hipe_arm_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)),
+  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill).
+
 do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->
   {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),
   do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1);
diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl
index dcf039676b..3ecf2f2fdb 100644
--- a/lib/hipe/arm/hipe_arm_registers.erl
+++ b/lib/hipe/arm/hipe_arm_registers.erl
@@ -180,6 +180,8 @@ is_arg(R) ->
     _ -> false
   end.
 
+%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is
+%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1
 call_clobbered() ->		% does the RA strip the type or not?
   [{?R0,tagged},{?R0,untagged},
    {?R1,tagged},{?R1,untagged},
diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl
new file mode 100644
index 0000000000..4d077f3cd6
--- /dev/null
+++ b/lib/hipe/arm/hipe_arm_subst.erl
@@ -0,0 +1,112 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2016. All Rights Reserved.
+%% 
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-module(hipe_arm_subst).
+-export([insn_temps/2]).
+-include("hipe_arm.hrl").
+
+%% These should be moved to hipe_arm and exported
+-type temp()    :: #arm_temp{}.
+-type shiftop() :: lsl | lsr | asr | ror.
+-type imm4()    :: 0..15.
+-type imm5()    :: 0..31.
+-type imm8()    :: 0..255.
+-type am1()     :: {imm8(),imm4()}
+		 | temp()
+		 | {temp(), rrx}
+		 | {temp(), shiftop(), imm5()}
+		 | {temp(), shiftop(), temp()}.
+-type am2()     :: #am2{}.
+-type am3()     :: #am3{}.
+-type arg()     :: temp() | integer().
+-type funv()    :: #arm_mfa{} | #arm_prim{} | temp().
+-type insn()    :: tuple(). % for now
+
+-type subst_fun() :: fun((temp()) -> temp()).
+
+%% @doc Maps over the temporaries in an instruction
+-spec insn_temps(subst_fun(), insn()) -> insn().
+insn_temps(T, I) ->
+  AM1 = fun(O) -> am1_temps(T, O) end,
+  AM2 = fun(O) -> am2_temps(T, O) end,
+  AM3 = fun(O) -> am3_temps(T, O) end,
+  Arg = fun(O) -> arg_temps(T, O) end,
+  case I of
+      #alu  {dst=D,src=L,am1=R} -> I#alu{dst=T(D),src=T(L),am1=AM1(R)};
+      #cmp        {src=L,am1=R} -> I#cmp         {src=T(L),am1=AM1(R)};
+      #load       {dst=D,am2=S} -> I#load        {dst=T(D),am2=AM2(S)};
+      #ldrsb      {dst=D,am3=S} -> I#ldrsb       {dst=T(D),am3=AM3(S)};
+      #move       {dst=D,am1=S} -> I#move        {dst=T(D),am1=AM1(S)};
+      #pseudo_move{dst=D,src=S} -> I#pseudo_move {dst=T(D),src=T(S)};
+      #store      {src=S,am2=D} -> I#store       {src=T(S),am2=AM2(D)};
+      #b_label{} -> I;
+      #comment{} -> I;
+      #label{} -> I;
+      #pseudo_bc{} -> I;
+      #pseudo_blr{} -> I;
+      #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)};
+      #pseudo_call_prepare{} -> I;
+      #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)};
+      #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} ->
+	  I#pseudo_switch{jtab=T(J),index=T(Ix)};
+      #pseudo_tailcall{funv=F,stkargs=Stk} ->
+	  I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)};
+      #pseudo_tailcall_prepare{} -> I;
+      #smull{dstlo=DL,dsthi=DH,src1=L,src2=R} ->
+	  I#smull{dstlo=T(DL),dsthi=T(DH),src1=T(L),src2=T(R)}
+  end.
+
+-spec am1_temps(subst_fun(), am1()) -> am1().
+am1_temps(_SubstTemp, T={C,R}) when is_integer(C), is_integer(R) -> T;
+am1_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T);
+am1_temps(SubstTemp, {T=#arm_temp{},rrx}) -> {SubstTemp(T),rrx};
+am1_temps(SubstTemp, {A=#arm_temp{},Op,B=#arm_temp{}}) when is_atom(Op) ->
+    {SubstTemp(A),Op,SubstTemp(B)};
+am1_temps(SubstTemp, {T=#arm_temp{},Op,I}) when is_atom(Op), is_integer(I) ->
+    {SubstTemp(T),Op,I}.
+
+-spec am2_temps(subst_fun(), am2()) -> am2().
+am2_temps(SubstTemp, T=#am2{src=A=#arm_temp{},offset=O0}) ->
+    O = case O0 of
+	    _ when is_integer(O0) -> O0;
+	    #arm_temp{} -> SubstTemp(O0);
+	    {B=#arm_temp{},rrx} -> {SubstTemp(B),rrx};
+	    {B=#arm_temp{},Op,I} when is_atom(Op), is_integer(I) ->
+		{SubstTemp(B),Op,I}
+	end,
+    T#am2{src=SubstTemp(A),offset=O}.
+
+-spec am3_temps(subst_fun(), am3()) -> am3().
+am3_temps(SubstTemp, T=#am3{src=A=#arm_temp{},offset=O0}) ->
+    O = case O0 of
+	    _ when is_integer(O0) -> O0;
+	    #arm_temp{} -> SubstTemp(O0)
+	end,
+    T#am3{src=SubstTemp(A),offset=O}.
+
+-spec funv_temps(subst_fun(), funv()) -> funv().
+funv_temps(_SubstTemp, M=#arm_mfa{}) -> M;
+funv_temps(_SubstTemp, P=#arm_prim{}) -> P;
+funv_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T).
+
+-spec arg_temps(subst_fun(), arg()) -> arg().
+arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm;
+arg_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T).
diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl
index 93342aba33..2f9181d517 100644
--- a/lib/hipe/arm/hipe_rtl_to_arm.erl
+++ b/lib/hipe/arm/hipe_rtl_to_arm.erl
@@ -138,7 +138,6 @@ mk_shift(S, Dst, Src1, ShiftOp, Src2) ->
   end.
 
 mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) ->
-  io:format("~w: RTL alu with two immediates\n", [?MODULE]),
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Src1,
 	mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)).
@@ -179,7 +178,6 @@ mk_arith(S, Dst, Src1, ArithOp, Src2) ->
   end.
 
 mk_arith_ii(S, Dst, Src1, ArithOp, Src2) ->
-  io:format("~w: RTL alu with two immediates\n", [?MODULE]),
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Src1,
 	mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)).
@@ -277,7 +275,6 @@ mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) ->
   end.
 
 mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) ->
-  io:format("~w: RTL branch with two immediates\n", [?MODULE]),
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Imm1,
 	mk_branch_ri(Tmp, Cond, Imm2,
@@ -472,7 +469,6 @@ mk_load(Dst, Base1, Base2, LoadSize, LoadSign) ->
   end.
 
 mk_load_ii(Dst, Base1, Base2, LdOp) ->
-  io:format("~w: RTL load with two immediates\n", [?MODULE]),
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Base1,
 	mk_load_ri(Dst, Tmp, Base2, LdOp)).
@@ -485,7 +481,6 @@ mk_load_rr(Dst, Base1, Base2, LdOp) ->
   [hipe_arm:mk_load(LdOp, Dst, Am2)].
 
 mk_ldrsb_ii(Dst, Base1, Base2) ->
-  io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]),
   Tmp = new_untagged_temp(),
   mk_li(Tmp, Base1,
 	mk_ldrsb_ri(Dst, Tmp, Base2)).
@@ -543,7 +538,7 @@ conv_return(I, Map, Data) ->
   {I2, Map0, Data}.
 
 conv_store(I, Map, Data) ->
-  {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map),
+  {Base, Map0} = conv_src(hipe_rtl:store_base(I), Map),
   {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),
   {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),
   StoreSize = hipe_rtl:store_size(I),
@@ -567,13 +562,28 @@ mk_store(Src, Base, Offset, StoreSize) ->
   end.
 
 mk_store2(Src, Base, Offset, StOp) ->
-  case hipe_arm:is_temp(Offset) of
+  case hipe_arm:is_temp(Base) of
     true ->
-      mk_store_rr(Src, Base, Offset, StOp);
-    _ ->
-      mk_store_ri(Src, Base, Offset, StOp)
+      case hipe_arm:is_temp(Offset) of
+	true ->
+	  mk_store_rr(Src, Base, Offset, StOp);
+	_ ->
+	  mk_store_ri(Src, Base, Offset, StOp)
+      end;
+    false ->
+      case hipe_arm:is_temp(Offset) of
+	true ->
+	  mk_store_ri(Src, Offset, Base, StOp);
+	_ ->
+	  mk_store_ii(Src, Base, Offset, StOp)
+      end
   end.
-  
+
+mk_store_ii(Src, Base, Offset, StOp) ->
+  Tmp = new_untagged_temp(),
+  mk_li(Tmp, Base,
+	mk_store_ri(Src, Tmp, Offset, StOp)).
+
 mk_store_ri(Src, Base, Offset, StOp) ->
   hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []).