aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/amd64
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2016-09-02 14:51:25 +0200
committerSverker Eriksson <[email protected]>2016-09-02 14:51:25 +0200
commitc2f8b61ca3682281752fa0984699214dfcbf7ccd (patch)
tree3f44fa5c58d0d0d845045c3c5535aefad333b6dd /lib/hipe/amd64
parent87643cf92c061d7518299fdebb326e315c32e528 (diff)
parenta19e3f0e1e82b793d58f9ef0db907ba637793fb6 (diff)
downloadotp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.gz
otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.bz2
otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.zip
Merge branch 'sverker/hipe-performance-o1/PR-1154/OTP-13862'
* sverker/hipe-performance-o1/PR-1154: hipe_sparc: Minimise CFG<->linear conversions hipe_ppc: Minimise CFG<->linear conversions hipe_arm: Minimise CFG<->linear conversions hipe_x86: Use lea instead of move+add hipe_arm: Improve peephole optimiser hipe_arm: Be resilient to crappy RTL hipe_ppc: Be resilient to crappy RTL hipe_sparc: Be resilient to crappy RTL hipe: Reuse liveness info for spillmin hipe_x86: Minimise CFG<->linear conversions hipe: Fix o0 and o1 hipe: Add o0 and o1 to tests hipe_rtl_binary:get_word_integer/4: Handle imms hipe_x86: Be resilient to crappy RTL hipe_x86: LSRA for SSE2
Diffstat (limited to 'lib/hipe/amd64')
-rw-r--r--lib/hipe/amd64/Makefile4
-rw-r--r--lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl153
-rw-r--r--lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl21
-rw-r--r--lib/hipe/amd64/hipe_amd64_registers.erl5
-rw-r--r--lib/hipe/amd64/hipe_amd64_sse2.erl82
5 files changed, 149 insertions, 116 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile
index 8dc2af2679..ea3559b7e6 100644
--- a/lib/hipe/amd64/Makefile
+++ b/lib/hipe/amd64/Makefile
@@ -57,10 +57,10 @@ MODULES=hipe_amd64_assemble \
hipe_amd64_ra_naive \
hipe_amd64_ra_postconditions \
hipe_amd64_ra_sse2_postconditions \
- hipe_amd64_ra_x87_ls \
hipe_amd64_registers \
hipe_amd64_spill_restore \
hipe_amd64_x87 \
+ hipe_amd64_sse2 \
hipe_rtl_to_amd64
ERL_FILES=$(MODULES:%=%.erl)
@@ -125,10 +125,10 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl
$(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl
$(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl
$(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl
-$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl
$(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl
$(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl
$(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl
+$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl
$(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl
$(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl
diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
index b1f7bd7572..bbf9170bc3 100644
--- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
+++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
@@ -21,7 +21,7 @@
-module(hipe_amd64_ra_sse2_postconditions).
--export([check_and_rewrite/2]).
+-export([check_and_rewrite/2, check_and_rewrite/3]).
-include("../x86/hipe_x86.hrl").
-define(HIPE_INSTRUMENT_COMPILER, true).
@@ -29,40 +29,48 @@
-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)).
-check_and_rewrite(AMD64Defun, Coloring) ->
+check_and_rewrite(AMD64CFG, Coloring) ->
+ check_and_rewrite(AMD64CFG, Coloring, 'normal').
+
+check_and_rewrite(AMD64CFG, Coloring, Strategy) ->
%%io:format("Converting\n"),
TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2),
%%io:format("Rewriting\n"),
- #defun{code=Code0} = AMD64Defun,
- {Code1, DidSpill} = do_insns(Code0, TempMap, [], false),
- {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}},
- DidSpill}.
-
-do_insns([I|Insns], TempMap, Accum, DidSpill0) ->
- {NewIs, DidSpill1} = do_insn(I, TempMap),
- do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1);
-do_insns([], _TempMap, Accum, DidSpill) ->
+ do_bbs(hipe_x86_cfg:labels(AMD64CFG), TempMap, Strategy, AMD64CFG, false).
+
+do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill};
+do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) ->
+ Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)),
+ {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0),
+ CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)),
+ do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill).
+
+do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->
+ {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),
+ do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum),
+ DidSpill0 or DidSpill1);
+do_insns([], _TempMap, _Strategy, Accum, DidSpill) ->
{lists:reverse(Accum), DidSpill}.
-do_insn(I, TempMap) -> % Insn -> {Insn list, DidSpill}
+do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill}
case I of
#fmove{} ->
- do_fmove(I, TempMap);
+ do_fmove(I, TempMap, Strategy);
#fp_unop{} ->
- do_fp_unop(I, TempMap);
+ do_fp_unop(I, TempMap, Strategy);
#fp_binop{} ->
- do_fp_binop(I, TempMap);
+ do_fp_binop(I, TempMap, Strategy);
_ ->
%% All non sse2 ops
{[I], false}
end.
%%% Fix an fp_binop.
-do_fp_binop(I, TempMap) ->
+do_fp_binop(I, TempMap, Strategy) ->
#fp_binop{src=Src,dst=Dst} = I,
case is_mem_opnd(Dst, TempMap) of
true ->
- Tmp = clone(Dst),
+ Tmp = clone(Dst, Strategy),
{[#fmove{src=Dst, dst=Tmp},
I#fp_binop{src=Src,dst=Tmp},
#fmove{src=Tmp,dst=Dst}],
@@ -71,11 +79,11 @@ do_fp_binop(I, TempMap) ->
{[I], false}
end.
-do_fp_unop(I, TempMap) ->
+do_fp_unop(I, TempMap, Strategy) ->
#fp_unop{arg=Arg} = I,
case is_mem_opnd(Arg, TempMap) of
true ->
- Tmp = clone(Arg),
+ Tmp = clone(Arg, Strategy),
{[#fmove{src=Arg, dst=Tmp},
I#fp_unop{arg=Tmp},
#fmove{src=Tmp,dst=Arg}],
@@ -85,7 +93,7 @@ do_fp_unop(I, TempMap) ->
end.
%%% Fix an fmove op.
-do_fmove(I, TempMap) ->
+do_fmove(I, TempMap, Strategy) ->
#fmove{src=Src,dst=Dst} = I,
case
(is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap))
@@ -93,7 +101,7 @@ do_fmove(I, TempMap) ->
orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap))
of
true ->
- Tmp = spill_temp(double),
+ Tmp = spill_temp(double, Strategy),
{[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}],
true};
false ->
@@ -106,86 +114,45 @@ is_float_temp(#x86_mem{}) -> false.
%%% Check if an operand denotes a memory cell (mem or pseudo).
is_mem_opnd(Opnd, TempMap) ->
- R =
- case Opnd of
- #x86_mem{} -> true;
- #x86_temp{type=double} ->
- Reg = hipe_x86:temp_reg(Opnd),
- case hipe_x86:temp_is_allocatable(Opnd) of
- true ->
- case tuple_size(TempMap) > Reg of
- true ->
- case
- hipe_temp_map:is_spilled(Reg, TempMap) of
- true ->
- ?count_temp(Reg),
- true;
- false -> false
- end;
- _ -> false
- end;
- false -> true
- end;
- _ -> false
- end,
- %% io:format("Op ~w mem: ~w\n",[Opnd,R]),
- R.
-
-%%% Check if an operand is a spilled Temp.
-
-%%src_is_spilled(Src, TempMap) ->
-%% case hipe_x86:is_temp(Src) of
-%% true ->
-%% Reg = hipe_x86:temp_reg(Src),
-%% case hipe_x86:temp_is_allocatable(Src) of
-%% true ->
-%% case tuple_size(TempMap) > Reg of
-%% true ->
-%% case hipe_temp_map:is_spilled(Reg, TempMap) of
-%% true ->
-%% ?count_temp(Reg),
-%% true;
-%% false ->
-%% false
-%% end;
-%% false ->
-%% false
-%% end;
-%% false -> true
-%% end;
-%% false -> false
-%% end.
-
-%% is_spilled(Temp, TempMap) ->
-%% case hipe_x86:temp_is_allocatable(Temp) of
-%% true ->
-%% Reg = hipe_x86:temp_reg(Temp),
-%% case tuple_size(TempMap) > Reg of
-%% true ->
-%% case hipe_temp_map:is_spilled(Reg, TempMap) of
-%% true ->
-%% ?count_temp(Reg),
-%% true;
-%% false ->
-%% false
-%% end;
-%% false ->
-%% false
-%% end;
-%% false -> true
-%% end.
+ case Opnd of
+ #x86_mem{} -> true;
+ #x86_temp{type=double} ->
+ Reg = hipe_x86:temp_reg(Opnd),
+ case hipe_x86:temp_is_allocatable(Opnd) of
+ true ->
+ case tuple_size(TempMap) > Reg of
+ true ->
+ case
+ hipe_temp_map:is_spilled(Reg, TempMap) of
+ true ->
+ ?count_temp(Reg),
+ true;
+ false -> false
+ end;
+ _ -> false
+ end;
+ false -> true
+ end;
+ _ -> false
+ end.
%%% Make Reg a clone of Dst (attach Dst's type to Reg).
-clone(Dst) ->
+clone(Dst, Strategy) ->
Type =
case Dst of
#x86_mem{} -> hipe_x86:mem_type(Dst);
#x86_temp{} -> hipe_x86:temp_type(Dst)
end,
- spill_temp(Type).
-
-spill_temp(Type) ->
+ spill_temp(Type, Strategy).
+
+spill_temp(Type, 'normal') ->
+ hipe_x86:mk_new_temp(Type);
+spill_temp(double, 'linearscan') ->
+ hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(), double);
+spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged ->
+ %% We can make a new temp here since we have yet to allocate registers for
+ %% these types
hipe_x86:mk_new_temp(Type).
%%% Make a certain reg into a clone of Dst
diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
deleted file mode 100644
index 6da3f44cd3..0000000000
--- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
+++ /dev/null
@@ -1,21 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2004-2016. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%% http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
-
--include("../x86/hipe_x86_ra_x87_ls.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl
index 780c2cc547..ada5311453 100644
--- a/lib/hipe/amd64/hipe_amd64_registers.erl
+++ b/lib/hipe/amd64/hipe_amd64_registers.erl
@@ -52,6 +52,7 @@
tailcall_clobbered/0,
temp0/0,
temp1/0,
+ sse2_temp0/0,
%% fixed/0,
wordsize/0
]).
@@ -107,6 +108,8 @@ heap_limit_offset() -> ?P_HP_LIMIT.
-define(TEMP0, ?R14).
-define(TEMP1, ?R13).
+-define(SSE2_TEMP0, 00).
+
-define(PROC_POINTER, ?RBP).
reg_name(R) ->
@@ -204,6 +207,8 @@ allocatable() ->
allocatable_sse2() ->
[00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15
+sse2_temp0() -> ?SSE2_TEMP0.
+
allocatable_x87() ->
[0,1,2,3,4,5,6].
diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl
new file mode 100644
index 0000000000..ea6b6cb9ba
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_sse2.erl
@@ -0,0 +1,82 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Fix {mem, mem} floating point operations that result from linear scan
+%% allocated floats.
+
+-module(hipe_amd64_sse2).
+
+-export([map/1]).
+
+-include("../x86/hipe_x86.hrl").
+-include("../main/hipe.hrl").
+
+%%----------------------------------------------------------------------
+
+map(CFG) ->
+ hipe_x86_cfg:map_bbs(fun do_bb/2, CFG).
+
+do_bb(_Lbl, BB) ->
+ Code = do_insns(hipe_bb:code(BB), []),
+ hipe_bb:code_update(BB, Code).
+
+do_insns([I|Insns], Accum) ->
+ NewIs = do_insn(I),
+ do_insns(Insns, lists:reverse(NewIs, Accum));
+do_insns([], Accum) ->
+ lists:reverse(Accum).
+
+do_insn(I) ->
+ case I of
+ #fp_binop{} -> do_fp_binop(I);
+ #fmove{} -> do_fmove(I);
+ _ -> [I]
+ end.
+
+do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) ->
+ {FixSrc, Src} = fix_binary(Src0, Dst),
+ FixSrc ++ [I#fp_binop{src=Src}].
+
+do_fmove(I = #fmove{src=Src0,dst=Dst}) ->
+ {FixSrc, Src} = fix_binary(Src0, Dst),
+ FixSrc ++ [I#fmove{src=Src}].
+
+fix_binary(Src0, Dst) ->
+ case is_mem_opnd(Src0) of
+ false -> {[], Src0};
+ true ->
+ case is_mem_opnd(Dst) of
+ false -> {[], Src0};
+ true ->
+ Src1 = spill_temp(),
+ {[hipe_x86:mk_fmove(Src0, Src1)], Src1}
+ end
+ end.
+
+is_mem_opnd(#x86_fpreg{reg=Reg}) ->
+ not hipe_amd64_registers:is_precoloured_sse2(Reg);
+is_mem_opnd(#x86_temp{type=double, reg=Reg}) ->
+ not hipe_amd64_registers:is_precoloured_sse2(Reg);
+is_mem_opnd(#x86_temp{type=_, reg=Reg}) ->
+ not hipe_amd64_registers:is_precoloured(Reg);
+is_mem_opnd(#x86_mem{}) -> true.
+
+spill_temp() ->
+ hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double).