Merge branch 'sverker/hipe-performance-o1/PR-1154/OTP-13862'

* sverker/hipe-performance-o1/PR-1154: hipe_sparc: Minimise CFG<->linear conversions hipe_ppc: Minimise CFG<->linear conversions hipe_arm: Minimise CFG<->linear conversions hipe_x86: Use lea instead of move+add hipe_arm: Improve peephole optimiser hipe_arm: Be resilient to crappy RTL hipe_ppc: Be resilient to crappy RTL hipe_sparc: Be resilient to crappy RTL hipe: Reuse liveness info for spillmin hipe_x86: Minimise CFG<->linear conversions hipe: Fix o0 and o1 hipe: Add o0 and o1 to tests hipe_rtl_binary:get_word_integer/4: Handle imms hipe_x86: Be resilient to crappy RTL hipe_x86: LSRA for SSE2
author: Sverker Eriksson <[email protected]> 2016-09-02 14:51:25 +0200
committer: Sverker Eriksson <[email protected]> 2016-09-02 14:51:25 +0200
commit: c2f8b61ca3682281752fa0984699214dfcbf7ccd (patch)
tree: 3f44fa5c58d0d0d845045c3c5535aefad333b6dd /lib/hipe/arm/hipe_arm_finalise.erl
parent: 87643cf92c061d7518299fdebb326e315c32e528 (diff)
parent: a19e3f0e1e82b793d58f9ef0db907ba637793fb6 (diff)
download: otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.gz
otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.bz2
otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.zip
1 files changed, 72 insertions, 13 deletions
diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl
index a4b2f9c73c..55651d7180 100644
--- a/lib/hipe/arm/hipe_arm_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_finalise.erl
@@ -20,13 +20,17 @@
 %%
 
 -module(hipe_arm_finalise).
--export([finalise/1]).
+-export([finalise/2]).
 -include("hipe_arm.hrl").
 
-finalise(Defun) ->
+finalise(Defun, Options) ->
   #defun{code=Code0} = Defun,
-  Code1 = peep(expand(Code0)),
-  Defun#defun{code=Code1}.
+  Code1Rev = expand(Code0),
+  Code2 = case proplists:get_bool(peephole, Options) of
+	    true -> peep(Code1Rev);
+	    false -> lists:reverse(Code1Rev)
+	  end,
+  Defun#defun{code=Code2}.
 
 expand(Insns) ->
   expand_list(Insns, []).
@@ -34,7 +38,7 @@ expand(Insns) ->
 expand_list([I|Insns], Accum) ->
   expand_list(Insns, expand_insn(I, Accum));
 expand_list([], Accum) ->
-  lists:reverse(Accum).
+  Accum.
 
 expand_insn(I, Accum) ->
   case I of
@@ -63,12 +67,67 @@ expand_insn(I, Accum) ->
       [I|Accum]
   end.
 
-peep(Insns) ->
-  peep_list(Insns, []).
+%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly
+%% ordered list). This way, we can do replacements that would take multiple
+%% passes with an in-order peephole optimiser.
+%%
+%% N.B., if a rule wants to produce multiple instructions (even if some of them
+%% are unchanged, it should push the additional instructions on the More list,
+%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns)
+%% should have been peepholed previously.
+peep(RevInsns) ->
+  peep_list_skip([], RevInsns).
+
+peep_list([#b_label{'cond'='al',label=Label}
+	   | (Insns = [#label{label=Label}|_])], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst}
+		,am1=#arm_temp{reg=Dst}}|Insns], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}}
+	   |Insns], More) when Imm > 0, Imm =< 8 ->
+  peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}}
+	    |Insns], More);
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}}
+	   |Insns], More) when Imm >= 24, Imm < 32 ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More);
+
+%% XXX: Load-after-store optimisation should also be applied to RTL, where it
+%% can be more general, expose opportunities for constant propagation, etc.
+peep_list([#store{stop='strb',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns],
+	    [Str|More]);
+peep_list([#store{stop='str',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]);
+
+peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}}
+	   |Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={Mask band (bnot InvMask),0}} | Insns], More);
+
+%% XXX: The place that generates brain-dead code like the following should be
+%% fixed rather than trying to patch it over here.
+peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}}
+	   | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More);
+
+peep_list(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list(Accum, []) ->
+  Accum.
 
-peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) ->
-  peep_list(Insns, Accum);
-peep_list([I|Insns], Accum) ->
-  peep_list(Insns, [I|Accum]);
-peep_list([], Accum) ->
-  lists:reverse(Accum).
+%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has
+%% already been peeped or is otherwise uninteresting (such as empty).
+peep_list_skip(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list_skip(Accum, []) ->
+  Accum.
author	Sverker Eriksson <[email protected]>	2016-09-02 14:51:25 +0200
committer	Sverker Eriksson <[email protected]>	2016-09-02 14:51:25 +0200
commit	c2f8b61ca3682281752fa0984699214dfcbf7ccd (patch)
tree	3f44fa5c58d0d0d845045c3c5535aefad333b6dd /lib/hipe/arm/hipe_arm_finalise.erl
parent	87643cf92c061d7518299fdebb326e315c32e528 (diff)
parent	a19e3f0e1e82b793d58f9ef0db907ba637793fb6 (diff)
download	otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.gz otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.bz2 otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.zip