diff options
author | Magnus Lång <[email protected]> | 2016-04-14 19:04:22 +0200 |
---|---|---|
committer | Magnus Lång <[email protected]> | 2016-08-30 17:18:00 +0200 |
commit | cca2b0a38dd4cbc3dfef026e0d8c2cba57270935 (patch) | |
tree | 342e57fd13a5eed4898f3379b17942e34afd1210 /lib | |
parent | 5ffdaa02d53c26fbc41d4bd16575ef9a6ee3c1d7 (diff) | |
download | otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.gz otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.tar.bz2 otp-cca2b0a38dd4cbc3dfef026e0d8c2cba57270935.zip |
hipe_arm: Improve peephole optimiser
Diffstat (limited to 'lib')
-rw-r--r-- | lib/hipe/arm/hipe_arm_finalise.erl | 85 | ||||
-rw-r--r-- | lib/hipe/arm/hipe_arm_main.erl | 2 |
2 files changed, 73 insertions, 14 deletions
diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl index a4b2f9c73c..55651d7180 100644 --- a/lib/hipe/arm/hipe_arm_finalise.erl +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -20,13 +20,17 @@ %% -module(hipe_arm_finalise). --export([finalise/1]). +-export([finalise/2]). -include("hipe_arm.hrl"). -finalise(Defun) -> +finalise(Defun, Options) -> #defun{code=Code0} = Defun, - Code1 = peep(expand(Code0)), - Defun#defun{code=Code1}. + Code1Rev = expand(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peep(Code1Rev); + false -> lists:reverse(Code1Rev) + end, + Defun#defun{code=Code2}. expand(Insns) -> expand_list(Insns, []). @@ -34,7 +38,7 @@ expand(Insns) -> expand_list([I|Insns], Accum) -> expand_list(Insns, expand_insn(I, Accum)); expand_list([], Accum) -> - lists:reverse(Accum). + Accum. expand_insn(I, Accum) -> case I of @@ -63,12 +67,67 @@ expand_insn(I, Accum) -> [I|Accum] end. -peep(Insns) -> - peep_list(Insns, []). +%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly +%% ordered list). This way, we can do replacements that would take multiple +%% passes with an in-order peephole optimiser. +%% +%% N.B., if a rule wants to produce multiple instructions (even if some of them +%% are unchanged, it should push the additional instructions on the More list, +%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns) +%% should have been peepholed previously. +peep(RevInsns) -> + peep_list_skip([], RevInsns). + +peep_list([#b_label{'cond'='al',label=Label} + | (Insns = [#label{label=Label}|_])], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst} + ,am1=#arm_temp{reg=Dst}}|Insns], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}} + |Insns], More) when Imm > 0, Imm =< 8 -> + peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}} + |Insns], More); +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}} + |Insns], More) when Imm >= 24, Imm < 32 -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More); + +%% XXX: Load-after-store optimisation should also be applied to RTL, where it +%% can be more general, expose opportunities for constant propagation, etc. +peep_list([#store{stop='strb',src=Src,am2=Mem}=Str, + #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns], + [Str|More]); +peep_list([#store{stop='str',src=Src,am2=Mem}=Str, + #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]); + +peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}} + |Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={Mask band (bnot InvMask),0}} | Insns], More); + +%% XXX: The place that generates brain-dead code like the following should be +%% fixed rather than trying to patch it over here. +peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}} + | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More); + +peep_list(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list(Accum, []) -> + Accum. -peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> - peep_list(Insns, Accum); -peep_list([I|Insns], Accum) -> - peep_list(Insns, [I|Accum]); -peep_list([], Accum) -> - lists:reverse(Accum). +%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has +%% already been peeped or is otherwise uninteresting (such as empty). +peep_list_skip(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list_skip(Accum, []) -> + Accum. diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl index dce1193b24..c81cd92160 100644 --- a/lib/hipe/arm/hipe_arm_main.erl +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -32,7 +32,7 @@ rtl_to_arm(MFA, RTL, Options) -> Defun3 = hipe_arm_frame:frame(Defun2), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_arm_pp:pp(Defun3), - Defun4 = hipe_arm_finalise:finalise(Defun3), + Defun4 = hipe_arm_finalise:finalise(Defun3, Options), %% io:format("~w: after finalise\n", [?MODULE]), pp(Defun4, MFA, Options), {native, arm, {unprofiled, Defun4}}. |