From d93a42112b35e4dbfb0f34b413fffb543f15ca3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20L=C3=A5ng?= <margnus1@telia.com>
Date: Sat, 12 Mar 2016 01:22:45 +0100
Subject: hipe_x86: LSRA for SSE2

There is little point offering LSRA for x86 if we're still going to call
hipe_graph_coloring_regalloc for the floats. In particular, all
allocators except LSRA allocates an N^2 interference matrix, making them
unusable for really large functions.
---
 lib/hipe/amd64/Makefile                            |  4 +-
 .../amd64/hipe_amd64_ra_sse2_postconditions.erl    | 50 ++++++++------
 lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl            | 21 ------
 lib/hipe/amd64/hipe_amd64_registers.erl            |  5 ++
 lib/hipe/amd64/hipe_amd64_sse2.erl                 | 79 ++++++++++++++++++++++
 lib/hipe/main/hipe.app.src                         |  3 +-
 lib/hipe/regalloc/hipe_amd64_specific_sse2.erl     | 26 +++++--
 lib/hipe/regalloc/hipe_x86_specific_x87.erl        | 13 +++-
 lib/hipe/x86/Makefile                              |  2 -
 lib/hipe/x86/hipe_x86_ra.erl                       | 38 +++++++----
 lib/hipe/x86/hipe_x86_ra_finalise.erl              | 15 +++-
 lib/hipe/x86/hipe_x86_ra_ls.erl                    | 31 ++++++++-
 lib/hipe/x86/hipe_x86_ra_x87_ls.erl                | 64 ------------------
 13 files changed, 216 insertions(+), 135 deletions(-)
 delete mode 100644 lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
 create mode 100644 lib/hipe/amd64/hipe_amd64_sse2.erl
 delete mode 100644 lib/hipe/x86/hipe_x86_ra_x87_ls.erl

(limited to 'lib/hipe')

diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile
index 8dc2af2679..ea3559b7e6 100644
--- a/lib/hipe/amd64/Makefile
+++ b/lib/hipe/amd64/Makefile
@@ -57,10 +57,10 @@ MODULES=hipe_amd64_assemble \
 	hipe_amd64_ra_naive \
 	hipe_amd64_ra_postconditions \
 	hipe_amd64_ra_sse2_postconditions \
-	hipe_amd64_ra_x87_ls \
 	hipe_amd64_registers \
 	hipe_amd64_spill_restore \
 	hipe_amd64_x87 \
+	hipe_amd64_sse2 \
 	hipe_rtl_to_amd64
 
 ERL_FILES=$(MODULES:%=%.erl)
@@ -125,10 +125,10 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl
 $(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl
 $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl
 $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl
-$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl
 $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl
 $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl
 $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl
+$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl
 $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl
 
 $(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl
diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
index b1f7bd7572..8483d2d0d5 100644
--- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
+++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
@@ -21,7 +21,7 @@
 
 -module(hipe_amd64_ra_sse2_postconditions).
 
--export([check_and_rewrite/2]).
+-export([check_and_rewrite/2, check_and_rewrite/3]).
 
 -include("../x86/hipe_x86.hrl").
 -define(HIPE_INSTRUMENT_COMPILER, true).
@@ -30,39 +30,43 @@
 
 
 check_and_rewrite(AMD64Defun, Coloring) ->
+  check_and_rewrite(AMD64Defun, Coloring, 'normal').
+
+check_and_rewrite(AMD64Defun, Coloring, Strategy) ->
   %%io:format("Converting\n"),
   TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2),
   %%io:format("Rewriting\n"),
   #defun{code=Code0} = AMD64Defun,
-  {Code1, DidSpill} = do_insns(Code0, TempMap, [], false),
+  {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false),
   {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, 
    DidSpill}.
 
-do_insns([I|Insns], TempMap, Accum, DidSpill0) ->
-  {NewIs, DidSpill1} = do_insn(I, TempMap),
-  do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1);
-do_insns([], _TempMap, Accum, DidSpill) ->
+do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->
+  {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),
+  do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum),
+	   DidSpill0 or DidSpill1);
+do_insns([], _TempMap, _Strategy, Accum, DidSpill) ->
   {lists:reverse(Accum), DidSpill}.
 
-do_insn(I, TempMap) ->	% Insn -> {Insn list, DidSpill}
+do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}
   case I of
     #fmove{} ->
-      do_fmove(I, TempMap);
+      do_fmove(I, TempMap, Strategy);
     #fp_unop{} ->
-      do_fp_unop(I, TempMap);
+      do_fp_unop(I, TempMap, Strategy);
     #fp_binop{} ->
-      do_fp_binop(I, TempMap);
+      do_fp_binop(I, TempMap, Strategy);
     _ ->
       %% All non sse2 ops
       {[I], false}
   end.
 
 %%% Fix an fp_binop.
-do_fp_binop(I, TempMap) ->
+do_fp_binop(I, TempMap, Strategy) ->
   #fp_binop{src=Src,dst=Dst} = I,
   case is_mem_opnd(Dst, TempMap) of
     true ->
-      Tmp = clone(Dst),
+      Tmp = clone(Dst, Strategy),
       {[#fmove{src=Dst, dst=Tmp},
 	I#fp_binop{src=Src,dst=Tmp},
 	#fmove{src=Tmp,dst=Dst}],
@@ -71,11 +75,11 @@ do_fp_binop(I, TempMap) ->
       {[I], false}
   end.
 
-do_fp_unop(I, TempMap) ->
+do_fp_unop(I, TempMap, Strategy) ->
   #fp_unop{arg=Arg} = I,
   case is_mem_opnd(Arg, TempMap) of
     true ->
-      Tmp = clone(Arg),
+      Tmp = clone(Arg, Strategy),
       {[#fmove{src=Arg, dst=Tmp},
 	I#fp_unop{arg=Tmp},
 	#fmove{src=Tmp,dst=Arg}],
@@ -85,7 +89,7 @@ do_fp_unop(I, TempMap) ->
   end.
 
 %%% Fix an fmove op.
-do_fmove(I, TempMap) ->
+do_fmove(I, TempMap, Strategy) ->
   #fmove{src=Src,dst=Dst} = I,
   case
     (is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap))
@@ -93,7 +97,7 @@ do_fmove(I, TempMap) ->
     orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap))
   of
     true ->
-      Tmp = spill_temp(double),
+      Tmp = spill_temp(double, Strategy),
       {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}],
        true};
     false ->
@@ -177,15 +181,21 @@ is_mem_opnd(Opnd, TempMap) ->
 
 %%% Make Reg a clone of Dst (attach Dst's type to Reg).
 
-clone(Dst) ->
+clone(Dst, Strategy) ->
   Type =
     case Dst of
       #x86_mem{} -> hipe_x86:mem_type(Dst);
       #x86_temp{} -> hipe_x86:temp_type(Dst)
     end,
-  spill_temp(Type).
-
-spill_temp(Type) ->
+  spill_temp(Type, Strategy).
+
+spill_temp(Type, 'normal') ->
+  hipe_x86:mk_new_temp(Type);
+spill_temp(double, 'linearscan') ->
+  hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(), double);
+spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged ->
+  %% We can make a new temp here since we have yet to allocate registers for
+  %% these types
   hipe_x86:mk_new_temp(Type).
 
 %%% Make a certain reg into a clone of Dst
diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
deleted file mode 100644
index 6da3f44cd3..0000000000
--- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
+++ /dev/null
@@ -1,21 +0,0 @@
-%%
-%% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2004-2016. All Rights Reserved.
-%% 
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%% 
-%% %CopyrightEnd%
-%%
-
--include("../x86/hipe_x86_ra_x87_ls.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl
index 780c2cc547..ada5311453 100644
--- a/lib/hipe/amd64/hipe_amd64_registers.erl
+++ b/lib/hipe/amd64/hipe_amd64_registers.erl
@@ -52,6 +52,7 @@
  	 tailcall_clobbered/0,
  	 temp0/0,
 	 temp1/0,
+	 sse2_temp0/0,
 	 %% fixed/0,
 	 wordsize/0
 	]).
@@ -107,6 +108,8 @@ heap_limit_offset() -> ?P_HP_LIMIT.
 -define(TEMP0, ?R14).
 -define(TEMP1, ?R13).
 
+-define(SSE2_TEMP0, 00).
+
 -define(PROC_POINTER, ?RBP).
 
 reg_name(R) ->
@@ -204,6 +207,8 @@ allocatable() ->
 allocatable_sse2() ->
   [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15
 
+sse2_temp0() -> ?SSE2_TEMP0.
+
 allocatable_x87() ->
   [0,1,2,3,4,5,6].
 
diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl
new file mode 100644
index 0000000000..df78941be5
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_sse2.erl
@@ -0,0 +1,79 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2016. All Rights Reserved.
+%% 
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%% 
+%% %CopyrightEnd%
+%%
+%% Fix {mem, mem} floating point operations that result from linear scan
+%% allocated floats.
+
+-module(hipe_amd64_sse2).
+
+-export([map/1]).
+
+-include("../x86/hipe_x86.hrl").
+-include("../main/hipe.hrl").
+
+%%----------------------------------------------------------------------
+
+map(Defun = #defun{code=Code0}) ->
+  Code1 = do_insns(Code0, []),
+  Defun#defun{code=Code1}.
+
+do_insns([I|Insns], Accum) ->
+  NewIs = do_insn(I),
+  do_insns(Insns, lists:reverse(NewIs, Accum));
+do_insns([], Accum) ->
+  lists:reverse(Accum).
+
+do_insn(I) ->
+  case I of
+    #fp_binop{} -> do_fp_binop(I);
+    #fmove{}    -> do_fmove(I);
+    _           -> [I]
+  end.
+
+do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) ->
+  {FixSrc, Src} = fix_binary(Src0, Dst),
+  FixSrc ++ [I#fp_binop{src=Src}].
+
+do_fmove(I = #fmove{src=Src0,dst=Dst}) ->
+  {FixSrc, Src} = fix_binary(Src0, Dst),
+  FixSrc ++ [I#fmove{src=Src}].
+
+fix_binary(Src0, Dst) ->
+  case is_mem_opnd(Src0) of
+    false -> {[], Src0};
+    true ->
+      case is_mem_opnd(Dst) of
+	false -> {[], Src0};
+	true ->
+	  Src1 = spill_temp(),
+	  {[hipe_x86:mk_fmove(Src0, Src1)], Src1}
+      end
+  end.
+
+is_mem_opnd(#x86_fpreg{reg=Reg}) ->
+  not hipe_amd64_registers:is_precoloured_sse2(Reg);
+is_mem_opnd(#x86_temp{type=double, reg=Reg}) ->
+  not hipe_amd64_registers:is_precoloured_sse2(Reg);
+is_mem_opnd(#x86_temp{type=_, reg=Reg}) ->
+  not hipe_amd64_registers:is_precoloured(Reg);
+is_mem_opnd(#x86_mem{}) -> true.
+
+spill_temp() ->
+  hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double).
diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src
index acae2c637d..6c3a2741b3 100644
--- a/lib/hipe/main/hipe.app.src
+++ b/lib/hipe/main/hipe.app.src
@@ -49,12 +49,12 @@
 	     hipe_amd64_ra_naive,
 	     hipe_amd64_ra_postconditions,
 	     hipe_amd64_ra_sse2_postconditions,
-	     hipe_amd64_ra_x87_ls,
 	     hipe_amd64_registers,
 	     hipe_amd64_specific,
 	     hipe_amd64_specific_sse2,
 	     hipe_amd64_specific_x87,
 	     hipe_amd64_spill_restore,
+	     hipe_amd64_sse2,
 	     hipe_amd64_x87,
 	     hipe_arm,
 	     hipe_arm_assemble,
@@ -217,7 +217,6 @@
 	     hipe_x86_ra_ls,
 	     hipe_x86_ra_naive,
 	     hipe_x86_ra_postconditions,
-	     hipe_x86_ra_x87_ls,
 	     hipe_x86_registers,
 	     hipe_x86_specific,
 	     hipe_x86_specific_x87,
diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 50e5869d45..8766712ecd 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -42,7 +42,9 @@
          reg_nr/1,
 	 non_alloc/1,
 	 allocatable/0,
-         physical_name/1,
+	 allocatable/1,
+	 temp0/0,
+	 physical_name/1,
 	 all_precoloured/0,
 	 new_spill_index/1,	%% used by hipe_ls_regalloc
 	 var_range/1,
@@ -52,7 +54,8 @@
 
 %% callbacks for hipe_regalloc_loop
 -export([defun_to_cfg/1,
-	 check_and_rewrite/2]).
+	 check_and_rewrite/2,
+	 check_and_rewrite/3]).
 
 %%----------------------------------------------------------------------------
 
@@ -66,6 +69,10 @@ defun_to_cfg(Defun) ->
 check_and_rewrite(Defun, Coloring) ->
   hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring).
 
+check_and_rewrite(Defun, Coloring, Strategy) ->
+  hipe_amd64_ra_sse2_postconditions:check_and_rewrite(
+    Defun, Coloring, Strategy).
+
 reverse_postorder(CFG) ->
   hipe_x86_cfg:reverse_postorder(CFG).
 
@@ -75,8 +82,8 @@ breadthorder(CFG) ->
 postorder(CFG) ->
   hipe_x86_cfg:postorder(CFG).
 
-is_global(_Reg) ->
-  false.
+is_global(Reg) ->
+  hipe_amd64_registers:sse2_temp0() =:= Reg.
  
 is_fixed(_Reg) ->
   false.
@@ -109,7 +116,16 @@ liveout(BB_in_out_liveness, Label) ->
 %% Registers stuff
 
 allocatable() ->
-  hipe_amd64_registers:allocatable_sse2().
+  allocatable('normal').
+
+allocatable('normal') ->
+  hipe_amd64_registers:allocatable_sse2();
+allocatable('linearscan') ->
+  hipe_amd64_registers:allocatable_sse2() --
+    [hipe_amd64_registers:sse2_temp0()].
+
+temp0() ->
+  hipe_amd64_registers:sse2_temp0().
 
 all_precoloured() ->
   allocatable().
diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
index ece07cb2f9..ff5d1b9c8b 100644
--- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
@@ -32,7 +32,7 @@
 -endif.
 
 -module(?HIPE_X86_SPECIFIC_X87).
--export([allocatable/0,
+-export([allocatable/1,
 	 is_precoloured/1,
 	 %% var_range/1,
 	 %% def_use/1,
@@ -58,7 +58,14 @@
 	 physical_name/1,
 	 breadthorder/1,
 	 postorder/1,
- 	 reverse_postorder/1]).
+	 reverse_postorder/1]).
+
+%% callbacks for hipe_x86_ra_ls
+-export([check_and_rewrite/3]).
+
+%% Rewrite happens in hipe_x86_ra_finalise:finalise/4
+check_and_rewrite(Defun, _Coloring, 'linearscan') ->
+  {Defun, false}.
 
 breadthorder(CFG) ->
   hipe_x86_cfg:breadthorder(CFG).
@@ -103,7 +110,7 @@ liveout(BB_in_out_liveness,Label) ->
 
 %% Registers stuff
 
-allocatable() ->
+allocatable('linearscan') ->
   ?HIPE_X86_REGISTERS:allocatable_x87().
 
 is_precoloured(Reg) ->
diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile
index 93f8b955dd..9b21270426 100644
--- a/lib/hipe/x86/Makefile
+++ b/lib/hipe/x86/Makefile
@@ -60,7 +60,6 @@ MODULES=hipe_rtl_to_x86 \
 	hipe_x86_ra_ls \
 	hipe_x86_ra_naive \
 	hipe_x86_ra_postconditions \
-	hipe_x86_ra_x87_ls \
 	hipe_x86_registers \
 	hipe_x86_spill_restore \
 	hipe_x86_x87
@@ -133,7 +132,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl
 $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl
 $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl
 $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl
-$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl
 $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl
 $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl
 $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl
diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl
index f66961a7a7..d47ba532fe 100644
--- a/lib/hipe/x86/hipe_x86_ra.erl
+++ b/lib/hipe/x86/hipe_x86_ra.erl
@@ -75,25 +75,35 @@ ra(Defun, SpillIndex, Options, RegAllocMod) ->
 
 -ifdef(HIPE_AMD64).
 ra_fp(Defun, Options) ->
-  case proplists:get_bool(inline_fp, Options) and
-       (proplists:get_value(regalloc, Options) =/= naive) of
-    true ->
-      case proplists:get_bool(x87, Options) of
-	true ->
-	  hipe_amd64_ra_x87_ls:ra(Defun, Options);
-	false ->
-	  hipe_regalloc_loop:ra_fp(Defun, Options,
-				   hipe_coalescing_regalloc,
-				   hipe_amd64_specific_sse2)
-      end;
-    false ->
-      {Defun,[],0}
+  Regalloc0 = proplists:get_value(regalloc, Options),
+  {Regalloc, TargetMod} =
+    case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of
+      false -> {naive, undefined};
+      true ->
+	case proplists:get_bool(x87, Options) of
+	  true ->  {linear_scan, hipe_amd64_specific_x87};
+	  false -> {Regalloc0,   hipe_amd64_specific_sse2}
+	end
+    end,
+  case Regalloc of
+    coalescing  -> ra_fp(Defun, Options, hipe_coalescing_regalloc, TargetMod);
+    optimistic  -> ra_fp(Defun, Options, hipe_optimistic_regalloc, TargetMod);
+    graph_color -> ra_fp(Defun, Options, hipe_graph_coloring_regalloc,
+			 TargetMod);
+    linear_scan -> hipe_amd64_ra_ls:ra_fp(Defun, Options, TargetMod);
+    naive -> {Defun,[],0};
+    _ ->
+      exit({unknown_regalloc_compiler_option,
+	    proplists:get_value(regalloc,Options)})
   end.
+
+ra_fp(Defun, Options, RegAllocMod, TargetMod) ->
+  hipe_regalloc_loop:ra_fp(Defun, Options, RegAllocMod, TargetMod).
 -else.
 ra_fp(Defun, Options) ->
   case proplists:get_bool(inline_fp, Options) of
     true ->
-      hipe_x86_ra_x87_ls:ra(Defun, Options);
+      hipe_x86_ra_ls:ra_fp(Defun, Options, hipe_x86_specific_x87);
     false ->
       {Defun,[],0}
   end.
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
index 5dd75cb7ae..647d67eeeb 100644
--- a/lib/hipe/x86/hipe_x86_ra_finalise.erl
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -25,10 +25,13 @@
 -define(HIPE_X86_RA_FINALISE,	hipe_amd64_ra_finalise).
 -define(HIPE_X86_REGISTERS,	hipe_amd64_registers).
 -define(HIPE_X86_X87,		hipe_amd64_x87).
+-define(HIPE_X86_SSE2,		hipe_amd64_sse2).
+-define(IF_HAS_SSE2(Expr),	Expr).
 -else.
 -define(HIPE_X86_RA_FINALISE,	hipe_x86_ra_finalise).
 -define(HIPE_X86_REGISTERS,	hipe_x86_registers).
 -define(HIPE_X86_X87,		hipe_x86_x87).
+-define(IF_HAS_SSE2(Expr),).
 -endif.
 
 -module(?HIPE_X86_RA_FINALISE).
@@ -41,7 +44,17 @@ finalise(Defun, TempMap, FpMap, Options) ->
     true ->
       ?HIPE_X86_X87:map(Defun1);
     _ ->
-      Defun1
+      case
+	proplists:get_bool(inline_fp, Options)
+	and (proplists:get_value(regalloc, Options) =:= linear_scan)
+      of
+	%% Ugly, but required to avoid Dialyzer complaints about "Unknown
+	%% function" hipe_x86_sse2:map/1
+	?IF_HAS_SSE2(true ->
+			?HIPE_X86_SSE2:map(Defun1);)
+	false ->
+	  Defun1
+      end
   end.
 
 %%%
diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl
index 3e34433111..8cd51481dd 100644
--- a/lib/hipe/x86/hipe_x86_ra_ls.erl
+++ b/lib/hipe/x86/hipe_x86_ra_ls.erl
@@ -35,7 +35,7 @@
 -endif.
 
 -module(?HIPE_X86_RA_LS).
--export([ra/3,regalloc/7]).
+-export([ra/3,ra_fp/3,regalloc/7]).
 -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.
 -include("../main/hipe.hrl").
 
@@ -48,6 +48,35 @@ ra(Defun, SpillIndex, Options) ->
   ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))),
   alloc(NewDefun, SpillIndex, SpillLimit, Options).
 
+ra_fp(Defun, Options, TargetMod) ->
+  ?inc_counter(ra_calls_counter,1),
+  CFG = hipe_x86_cfg:init(Defun),
+  %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)),
+  SpillIndex = 0,
+  SpillLimit = TargetMod:number_of_temporaries(CFG),
+  ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))),
+
+  ?inc_counter(ra_iteration_counter,1),
+  %% ?HIPE_X86_PP:pp(Defun),
+  Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above?
+
+  {Coloring,NewSpillIndex} =
+    regalloc(Cfg,
+	     TargetMod:allocatable('linearscan'),
+	     [hipe_x86_cfg:start_label(Cfg)],
+	     SpillIndex, SpillLimit, Options,
+	     TargetMod),
+
+  {NewDefun, _DidSpill} =
+    TargetMod:check_and_rewrite(Defun, Coloring, 'linearscan'),
+  TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod),
+  {TempMap2, NewSpillIndex2} =
+    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options,
+			     TargetMod, TempMap),
+  Coloring2 =
+    hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2),
+  ?add_spills(Options, NewSpillIndex),
+  {NewDefun, Coloring2, NewSpillIndex2}.
 
 alloc(Defun, SpillIndex, SpillLimit, Options) ->
   ?inc_counter(ra_iteration_counter,1), 
diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl
deleted file mode 100644
index 1ee76e5948..0000000000
--- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl
+++ /dev/null
@@ -1,64 +0,0 @@
-%% $Id$
-%%
-%% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2006-2016. All Rights Reserved.
-%% 
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%% 
-%% %CopyrightEnd%
-%%
-
-%% Linear Scan register allocator for x87
-
--ifdef(HIPE_AMD64).
--define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls).
--define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87).
--define(HIPE_X86_PP, hipe_amd64_pp).
--define(HIPE_X86_RA_LS, hipe_amd64_ra_ls).
--else.
--define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls).
--define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87).
--define(HIPE_X86_PP, hipe_x86_pp).
--define(HIPE_X86_RA_LS, hipe_x86_ra_ls).
--endif.
-
--module(?HIPE_X86_RA_X87_LS).
--export([ra/2]).
-
-%%-define(DEBUG,1).
-
--define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation.
--include("../main/hipe.hrl").
-
-ra(Defun, Options) ->
-    ?inc_counter(ra_calls_counter,1),
-    CFG = hipe_x86_cfg:init(Defun),
-    %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)),
-    SpillIndex = 0,
-    SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG),
-    ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))),
-
-    ?inc_counter(ra_iteration_counter,1),
-    %% ?HIPE_X86_PP:pp(Defun),
-    Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above?
-
-    {Coloring,NewSpillIndex} =
-	?HIPE_X86_RA_LS:regalloc(Cfg,
-				 ?HIPE_X86_SPECIFIC_X87:allocatable(),
-				 [hipe_x86_cfg:start_label(Cfg)],
-				 SpillIndex, SpillLimit, Options,
-				 ?HIPE_X86_SPECIFIC_X87),
-
-    ?add_spills(Options, NewSpillIndex),
-    {Defun, Coloring, NewSpillIndex}.
-- 
cgit v1.2.3