hipe_arm: Improve peephole optimiser

author: Magnus Lång <[email protected]> 2016-04-14 19:04:22 +0200
committer: Magnus Lång <[email protected]> 2016-08-30 17:18:00 +0200
commit: cca2b0a38dd4cbc3dfef026e0d8c2cba57270935 (patch)
tree: 342e57fd13a5eed4898f3379b17942e34afd1210 /lib/hipe
parent: 5ffdaa02d53c26fbc41d4bd16575ef9a6ee3c1d7 (diff)
download: otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.gz
otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.bz2
otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.zip
2 files changed, 73 insertions, 14 deletions
diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl
index a4b2f9c73c..55651d7180 100644
--- a/lib/hipe/arm/hipe_arm_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_finalise.erl
@@ -20,13 +20,17 @@
 %%
 
 -module(hipe_arm_finalise).
--export([finalise/1]).
+-export([finalise/2]).
 -include("hipe_arm.hrl").
 
-finalise(Defun) ->
+finalise(Defun, Options) ->
   #defun{code=Code0} = Defun,
-  Code1 = peep(expand(Code0)),
-  Defun#defun{code=Code1}.
+  Code1Rev = expand(Code0),
+  Code2 = case proplists:get_bool(peephole, Options) of
+	    true -> peep(Code1Rev);
+	    false -> lists:reverse(Code1Rev)
+	  end,
+  Defun#defun{code=Code2}.
 
 expand(Insns) ->
   expand_list(Insns, []).
@@ -34,7 +38,7 @@ expand(Insns) ->
 expand_list([I|Insns], Accum) ->
   expand_list(Insns, expand_insn(I, Accum));
 expand_list([], Accum) ->
-  lists:reverse(Accum).
+  Accum.
 
 expand_insn(I, Accum) ->
   case I of
@@ -63,12 +67,67 @@ expand_insn(I, Accum) ->
       [I|Accum]
   end.
 
-peep(Insns) ->
-  peep_list(Insns, []).
+%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly
+%% ordered list). This way, we can do replacements that would take multiple
+%% passes with an in-order peephole optimiser.
+%%
+%% N.B., if a rule wants to produce multiple instructions (even if some of them
+%% are unchanged, it should push the additional instructions on the More list,
+%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns)
+%% should have been peepholed previously.
+peep(RevInsns) ->
+  peep_list_skip([], RevInsns).
+
+peep_list([#b_label{'cond'='al',label=Label}
+	   | (Insns = [#label{label=Label}|_])], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst}
+		,am1=#arm_temp{reg=Dst}}|Insns], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}}
+	   |Insns], More) when Imm > 0, Imm =< 8 ->
+  peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}}
+	    |Insns], More);
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}}
+	   |Insns], More) when Imm >= 24, Imm < 32 ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More);
+
+%% XXX: Load-after-store optimisation should also be applied to RTL, where it
+%% can be more general, expose opportunities for constant propagation, etc.
+peep_list([#store{stop='strb',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns],
+	    [Str|More]);
+peep_list([#store{stop='str',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]);
+
+peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}}
+	   |Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={Mask band (bnot InvMask),0}} | Insns], More);
+
+%% XXX: The place that generates brain-dead code like the following should be
+%% fixed rather than trying to patch it over here.
+peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}}
+	   | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More);
+
+peep_list(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list(Accum, []) ->
+  Accum.
 
-peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) ->
-  peep_list(Insns, Accum);
-peep_list([I|Insns], Accum) ->
-  peep_list(Insns, [I|Accum]);
-peep_list([], Accum) ->
-  lists:reverse(Accum).
+%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has
+%% already been peeped or is otherwise uninteresting (such as empty).
+peep_list_skip(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list_skip(Accum, []) ->
+  Accum.
diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl
index dce1193b24..c81cd92160 100644
--- a/lib/hipe/arm/hipe_arm_main.erl
+++ b/lib/hipe/arm/hipe_arm_main.erl
@@ -32,7 +32,7 @@ rtl_to_arm(MFA, RTL, Options) ->
   Defun3 = hipe_arm_frame:frame(Defun2),
   %% io:format("~w: after frame\n", [?MODULE]),
   %% hipe_arm_pp:pp(Defun3),
-  Defun4 = hipe_arm_finalise:finalise(Defun3),
+  Defun4 = hipe_arm_finalise:finalise(Defun3, Options),
   %% io:format("~w: after finalise\n", [?MODULE]),
   pp(Defun4, MFA, Options),
   {native, arm, {unprofiled, Defun4}}.
author	Magnus Lång <[email protected]>	2016-04-14 19:04:22 +0200
committer	Magnus Lång <[email protected]>	2016-08-30 17:18:00 +0200
commit	cca2b0a38dd4cbc3dfef026e0d8c2cba57270935 (patch)
tree	342e57fd13a5eed4898f3379b17942e34afd1210 /lib/hipe
parent	5ffdaa02d53c26fbc41d4bd16575ef9a6ee3c1d7 (diff)
download	otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.gz otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.bz2 otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.zip