1 files changed, 72 insertions, 20 deletions
diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl
index a4b2f9c73c..3a6fd5a2dd 100644
--- a/lib/hipe/arm/hipe_arm_finalise.erl
+++ b/lib/hipe/arm/hipe_arm_finalise.erl
@@ -1,9 +1,5 @@
 %% -*- erlang-indent-level: 2 -*-
 %%
-%% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2005-2016. All Rights Reserved.
-%% 
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
 %% You may obtain a copy of the License at
@@ -15,18 +11,19 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
-%% %CopyrightEnd%
-%%
 
 -module(hipe_arm_finalise).
--export([finalise/1]).
+-export([finalise/2]).
 -include("hipe_arm.hrl").
 
-finalise(Defun) ->
+finalise(Defun, Options) ->
   #defun{code=Code0} = Defun,
-  Code1 = peep(expand(Code0)),
-  Defun#defun{code=Code1}.
+  Code1Rev = expand(Code0),
+  Code2 = case proplists:get_bool(peephole, Options) of
+	    true -> peep(Code1Rev);
+	    false -> lists:reverse(Code1Rev)
+	  end,
+  Defun#defun{code=Code2}.
 
 expand(Insns) ->
   expand_list(Insns, []).
@@ -34,7 +31,7 @@ expand(Insns) ->
 expand_list([I|Insns], Accum) ->
   expand_list(Insns, expand_insn(I, Accum));
 expand_list([], Accum) ->
-  lists:reverse(Accum).
+  Accum.
 
 expand_insn(I, Accum) ->
   case I of
@@ -63,12 +60,67 @@ expand_insn(I, Accum) ->
       [I|Accum]
   end.
 
-peep(Insns) ->
-  peep_list(Insns, []).
+%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly
+%% ordered list). This way, we can do replacements that would take multiple
+%% passes with an in-order peephole optimiser.
+%%
+%% N.B., if a rule wants to produce multiple instructions (even if some of them
+%% are unchanged, it should push the additional instructions on the More list,
+%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns)
+%% should have been peepholed previously.
+peep(RevInsns) ->
+  peep_list_skip([], RevInsns).
+
+peep_list([#b_label{'cond'='al',label=Label}
+	   | (Insns = [#label{label=Label}|_])], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst}
+		,am1=#arm_temp{reg=Dst}}|Insns], More) ->
+  peep_list_skip(Insns, More);
+
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}}
+	   |Insns], More) when Imm > 0, Imm =< 8 ->
+  peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}}
+	    |Insns], More);
+peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}},
+	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}}
+	   |Insns], More) when Imm >= 24, Imm < 32 ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More);
+
+%% XXX: Load-after-store optimisation should also be applied to RTL, where it
+%% can be more general, expose opportunities for constant propagation, etc.
+peep_list([#store{stop='strb',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns],
+	    [Str|More]);
+peep_list([#store{stop='str',src=Src,am2=Mem}=Str,
+	   #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]);
+
+peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}}
+	   |Insns], More) ->
+  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src
+		 ,am1={Mask band (bnot InvMask),0}} | Insns], More);
+
+%% XXX: The place that generates brain-dead code like the following should be
+%% fixed rather than trying to patch it over here.
+peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem},
+	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}}
+	   | Insns], More) ->
+  peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More);
+
+peep_list(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list(Accum, []) ->
+  Accum.
 
-peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) ->
-  peep_list(Insns, Accum);
-peep_list([I|Insns], Accum) ->
-  peep_list(Insns, [I|Accum]);
-peep_list([], Accum) ->
-  lists:reverse(Accum).
+%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has
+%% already been peeped or is otherwise uninteresting (such as empty).
+peep_list_skip(Insns, [I|More]) ->
+  peep_list([I|Insns], More);
+peep_list_skip(Accum, []) ->
+  Accum.