diff options
Diffstat (limited to 'lib/hipe')
93 files changed, 1616 insertions, 1001 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile index 8dc2af2679..ea3559b7e6 100644 --- a/lib/hipe/amd64/Makefile +++ b/lib/hipe/amd64/Makefile @@ -57,10 +57,10 @@ MODULES=hipe_amd64_assemble \ hipe_amd64_ra_naive \ hipe_amd64_ra_postconditions \ hipe_amd64_ra_sse2_postconditions \ - hipe_amd64_ra_x87_ls \ hipe_amd64_registers \ hipe_amd64_spill_restore \ hipe_amd64_x87 \ + hipe_amd64_sse2 \ hipe_rtl_to_amd64 ERL_FILES=$(MODULES:%=%.erl) @@ -125,10 +125,10 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl $(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl +$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl $(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl index 5451f1fe7d..bbf9170bc3 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -21,7 +21,7 @@ -module(hipe_amd64_ra_sse2_postconditions). --export([check_and_rewrite/2]). +-export([check_and_rewrite/2, check_and_rewrite/3]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -29,40 +29,48 @@ -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(AMD64Defun, Coloring) -> +check_and_rewrite(AMD64CFG, Coloring) -> + check_and_rewrite(AMD64CFG, Coloring, 'normal'). + +check_and_rewrite(AMD64CFG, Coloring, Strategy) -> %%io:format("Converting\n"), TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2), %%io:format("Rewriting\n"), - #defun{code=Code0} = AMD64Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, [], false), - {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, - DidSpill}. - -do_insns([I|Insns], TempMap, Accum, DidSpill0) -> - {NewIs, DidSpill1} = do_insn(I, TempMap), - do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); -do_insns([], _TempMap, Accum, DidSpill) -> + do_bbs(hipe_x86_cfg:labels(AMD64CFG), TempMap, Strategy, AMD64CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), + DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> {lists:reverse(Accum), DidSpill}. -do_insn(I, TempMap) -> % Insn -> {Insn list, DidSpill} +do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} case I of #fmove{} -> - do_fmove(I, TempMap); + do_fmove(I, TempMap, Strategy); #fp_unop{} -> - do_fp_unop(I, TempMap); + do_fp_unop(I, TempMap, Strategy); #fp_binop{} -> - do_fp_binop(I, TempMap); + do_fp_binop(I, TempMap, Strategy); _ -> %% All non sse2 ops {[I], false} end. %%% Fix an fp_binop. -do_fp_binop(I, TempMap) -> +do_fp_binop(I, TempMap, Strategy) -> #fp_binop{src=Src,dst=Dst} = I, case is_mem_opnd(Dst, TempMap) of true -> - Tmp = clone(Dst), + Tmp = clone(Dst, Strategy), {[#fmove{src=Dst, dst=Tmp}, I#fp_binop{src=Src,dst=Tmp}, #fmove{src=Tmp,dst=Dst}], @@ -71,11 +79,11 @@ do_fp_binop(I, TempMap) -> {[I], false} end. -do_fp_unop(I, TempMap) -> +do_fp_unop(I, TempMap, Strategy) -> #fp_unop{arg=Arg} = I, case is_mem_opnd(Arg, TempMap) of true -> - Tmp = clone(Arg), + Tmp = clone(Arg, Strategy), {[#fmove{src=Arg, dst=Tmp}, I#fp_unop{arg=Tmp}, #fmove{src=Tmp,dst=Arg}], @@ -85,97 +93,66 @@ do_fp_unop(I, TempMap) -> end. %%% Fix an fmove op. -do_fmove(I, TempMap) -> +do_fmove(I, TempMap, Strategy) -> #fmove{src=Src,dst=Dst} = I, - case is_mem_opnd(Dst, TempMap) and is_mem_opnd(Src, TempMap) of + case + (is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap)) + orelse (is_mem_opnd(Src, TempMap) andalso (not is_float_temp(Dst))) + orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap)) + of true -> - Tmp = clone(Src), + Tmp = spill_temp(double, Strategy), {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}], true}; false -> {[I], false} end. +is_float_temp(#x86_temp{type=Type}) -> Type =:= double; +is_float_temp(#x86_mem{}) -> false. + %%% Check if an operand denotes a memory cell (mem or pseudo). is_mem_opnd(Opnd, TempMap) -> - R = - case Opnd of - #x86_mem{} -> true; - #x86_temp{} -> - Reg = hipe_x86:temp_reg(Opnd), - case hipe_x86:temp_is_allocatable(Opnd) of - true -> - case tuple_size(TempMap) > Reg of - true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> false - end; - false -> true - end; - _ -> false - end, - %% io:format("Op ~w mem: ~w\n",[Opnd,R]), - R. - -%%% Check if an operand is a spilled Temp. - -%%src_is_spilled(Src, TempMap) -> -%% case hipe_x86:is_temp(Src) of -%% true -> -%% Reg = hipe_x86:temp_reg(Src), -%% case hipe_x86:temp_is_allocatable(Src) of -%% true -> -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end; -%% false -> false -%% end. - -%% is_spilled(Temp, TempMap) -> -%% case hipe_x86:temp_is_allocatable(Temp) of -%% true -> -%% Reg = hipe_x86:temp_reg(Temp), -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end. + case Opnd of + #x86_mem{} -> true; + #x86_temp{type=double} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case tuple_size(TempMap) > Reg of + true -> + case + hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + _ -> false + end; + false -> true + end; + _ -> false + end. %%% Make Reg a clone of Dst (attach Dst's type to Reg). -clone(Dst) -> +clone(Dst, Strategy) -> Type = case Dst of #x86_mem{} -> hipe_x86:mem_type(Dst); #x86_temp{} -> hipe_x86:temp_type(Dst) end, + spill_temp(Type, Strategy). + +spill_temp(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp(double, 'linearscan') -> + hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(), double); +spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged -> + %% We can make a new temp here since we have yet to allocate registers for + %% these types hipe_x86:mk_new_temp(Type). %%% Make a certain reg into a clone of Dst diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl deleted file mode 100644 index 6da3f44cd3..0000000000 --- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl +++ /dev/null @@ -1,21 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - --include("../x86/hipe_x86_ra_x87_ls.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index 780c2cc547..ada5311453 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -52,6 +52,7 @@ tailcall_clobbered/0, temp0/0, temp1/0, + sse2_temp0/0, %% fixed/0, wordsize/0 ]). @@ -107,6 +108,8 @@ heap_limit_offset() -> ?P_HP_LIMIT. -define(TEMP0, ?R14). -define(TEMP1, ?R13). +-define(SSE2_TEMP0, 00). + -define(PROC_POINTER, ?RBP). reg_name(R) -> @@ -204,6 +207,8 @@ allocatable() -> allocatable_sse2() -> [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15 +sse2_temp0() -> ?SSE2_TEMP0. + allocatable_x87() -> [0,1,2,3,4,5,6]. diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl new file mode 100644 index 0000000000..ea6b6cb9ba --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_sse2.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Fix {mem, mem} floating point operations that result from linear scan +%% allocated floats. + +-module(hipe_amd64_sse2). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(CFG) -> + hipe_x86_cfg:map_bbs(fun do_bb/2, CFG). + +do_bb(_Lbl, BB) -> + Code = do_insns(hipe_bb:code(BB), []), + hipe_bb:code_update(BB, Code). + +do_insns([I|Insns], Accum) -> + NewIs = do_insn(I), + do_insns(Insns, lists:reverse(NewIs, Accum)); +do_insns([], Accum) -> + lists:reverse(Accum). + +do_insn(I) -> + case I of + #fp_binop{} -> do_fp_binop(I); + #fmove{} -> do_fmove(I); + _ -> [I] + end. + +do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fp_binop{src=Src}]. + +do_fmove(I = #fmove{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fmove{src=Src}]. + +fix_binary(Src0, Dst) -> + case is_mem_opnd(Src0) of + false -> {[], Src0}; + true -> + case is_mem_opnd(Dst) of + false -> {[], Src0}; + true -> + Src1 = spill_temp(), + {[hipe_x86:mk_fmove(Src0, Src1)], Src1} + end + end. + +is_mem_opnd(#x86_fpreg{reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=double, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=_, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured(Reg); +is_mem_opnd(#x86_mem{}) -> true. + +spill_temp() -> + hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double). diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl index f2fa0a5164..2fb6675da9 100644 --- a/lib/hipe/arm/hipe_arm_cfg.erl +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1]). @@ -35,6 +36,7 @@ -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_arm.hrl"). -include("../flow/cfg.hrl"). diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl index a4b2f9c73c..55651d7180 100644 --- a/lib/hipe/arm/hipe_arm_finalise.erl +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -20,13 +20,17 @@ %% -module(hipe_arm_finalise). --export([finalise/1]). +-export([finalise/2]). -include("hipe_arm.hrl"). -finalise(Defun) -> +finalise(Defun, Options) -> #defun{code=Code0} = Defun, - Code1 = peep(expand(Code0)), - Defun#defun{code=Code1}. + Code1Rev = expand(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peep(Code1Rev); + false -> lists:reverse(Code1Rev) + end, + Defun#defun{code=Code2}. expand(Insns) -> expand_list(Insns, []). @@ -34,7 +38,7 @@ expand(Insns) -> expand_list([I|Insns], Accum) -> expand_list(Insns, expand_insn(I, Accum)); expand_list([], Accum) -> - lists:reverse(Accum). + Accum. expand_insn(I, Accum) -> case I of @@ -63,12 +67,67 @@ expand_insn(I, Accum) -> [I|Accum] end. -peep(Insns) -> - peep_list(Insns, []). +%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly +%% ordered list). This way, we can do replacements that would take multiple +%% passes with an in-order peephole optimiser. +%% +%% N.B., if a rule wants to produce multiple instructions (even if some of them +%% are unchanged, it should push the additional instructions on the More list, +%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns) +%% should have been peepholed previously. +peep(RevInsns) -> + peep_list_skip([], RevInsns). + +peep_list([#b_label{'cond'='al',label=Label} + | (Insns = [#label{label=Label}|_])], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst} + ,am1=#arm_temp{reg=Dst}}|Insns], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}} + |Insns], More) when Imm > 0, Imm =< 8 -> + peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}} + |Insns], More); +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}} + |Insns], More) when Imm >= 24, Imm < 32 -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More); + +%% XXX: Load-after-store optimisation should also be applied to RTL, where it +%% can be more general, expose opportunities for constant propagation, etc. +peep_list([#store{stop='strb',src=Src,am2=Mem}=Str, + #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns], + [Str|More]); +peep_list([#store{stop='str',src=Src,am2=Mem}=Str, + #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]); + +peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}} + |Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={Mask band (bnot InvMask),0}} | Insns], More); + +%% XXX: The place that generates brain-dead code like the following should be +%% fixed rather than trying to patch it over here. +peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}} + | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More); + +peep_list(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list(Accum, []) -> + Accum. -peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> - peep_list(Insns, Accum); -peep_list([I|Insns], Accum) -> - peep_list(Insns, [I|Accum]); -peep_list([], Accum) -> - lists:reverse(Accum). +%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has +%% already been peeped or is otherwise uninteresting (such as empty). +peep_list_skip(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list_skip(Accum, []) -> + Accum. diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl index e1e441a967..9a349b47d3 100644 --- a/lib/hipe/arm/hipe_arm_frame.erl +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -27,16 +27,14 @@ -define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet -frame(Defun) -> - Formals = fix_formals(hipe_arm:defun_formals(Defun)), - Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_arm_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)), - CFG0 = hipe_arm_cfg:init(Defun), - Liveness = ?LIVENESS_ALL:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_arm_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = ?LIVENESS_ALL:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_arm_registers:nr_args(), Formals). @@ -51,32 +49,21 @@ do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_arm_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_arm_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label), - Block = hipe_arm_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); do_block([], _, Context, FPoff, RevCode) -> FPoff0 = context_framesize(Context), - if FPoff =:= FPoff0 -> []; - true -> exit({?MODULE,do_block,FPoff}) - end, + FPoff0 = FPoff, lists:reverse(RevCode, []). do_insn(I, LiveOut, Context, FPoff) -> @@ -543,39 +530,46 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr(Insns) -> +clobbers_lr(CFG) -> LRreg = hipe_arm_registers:lr(), LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'), LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'), - clobbers_lr(Insns, LRtagged, LRuntagged). - -clobbers_lr([I|Insns], LRtagged, LRuntagged) -> - Defs = hipe_arm_defuse:insn_def_gpr(I), - case lists:member(LRtagged, Defs) of - true -> true; - false -> - case lists:member(LRuntagged, Defs) of - true -> true; - false -> clobbers_lr(Insns, LRtagged, LRuntagged) - end - end; -clobbers_lr([], _LRtagged, _LRuntagged) -> false. + any_insn(fun(I) -> + Defs = hipe_arm_defuse:insn_def_gpr(I), + lists:member(LRtagged, Defs) + orelse lists:member(LRuntagged, Defs) + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_arm_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -604,16 +598,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_arm_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl index dce1193b24..8a7fa86394 100644 --- a/lib/hipe/arm/hipe_arm_main.erl +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -24,15 +24,17 @@ rtl_to_arm(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_arm:translate(RTL), + CFG1 = hipe_arm_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_arm_pp:pp(Defun1), - Defun2 = hipe_arm_ra:ra(Defun1, Options), + CFG2 = hipe_arm_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_arm_pp:pp(Defun2), - Defun3 = hipe_arm_frame:frame(Defun2), + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + CFG3 = hipe_arm_frame:frame(CFG2), + Defun3 = hipe_arm_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_arm_pp:pp(Defun3), - Defun4 = hipe_arm_finalise:finalise(Defun3), + Defun4 = hipe_arm_finalise:finalise(Defun3, Options), %% io:format("~w: after finalise\n", [?MODULE]), pp(Defun4, MFA, Options), {native, arm, {unprofiled, Defun4}}. diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl index 2f65e864fd..5a7884b63c 100644 --- a/lib/hipe/arm/hipe_arm_ra.erl +++ b/lib/hipe/arm/hipe_arm_ra.erl @@ -22,36 +22,36 @@ -module(hipe_arm_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_arm_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG0)), + {CFG1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of %% true -> -%% hipe_regalloc_loop:ra_fp(Defun0, Options, +%% hipe_regalloc_loop:ra_fp(CFG0, Options, %% hipe_coalescing_regalloc, %% hipe_arm_specific_fp); false -> - {Defun0,[],0} + {CFG0,[],0} end, - %% hipe_arm_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG1)), + {CFG2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_arm_ra_ls:ra(CFG1, SpillIndex, Options); naive -> - hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_arm_ra_naive:ra(CFG1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_arm_pp:pp(Defun2), - hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + hipe_arm_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific). +ra(CFG, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, SpillIndex, Options, RegAllocMod, hipe_arm_specific). diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl index 4faeadcd7f..2a3fded147 100644 --- a/lib/hipe/arm/hipe_arm_ra_finalise.erl +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -23,12 +23,13 @@ -export([finalise/3]). -include("hipe_arm.hrl"). -finalise(Defun, TempMap, _FPMap0=[]) -> - Code = hipe_arm:defun_code(Defun), - {_, SpillLimit} = hipe_arm:defun_var_range(Defun), +finalise(CFG, TempMap, _FPMap0=[]) -> + {_, SpillLimit} = hipe_gensym:var_range(arm), Map = mk_ra_map(TempMap, SpillLimit), - NewCode = ra_code(Code, Map, []), - Defun#defun{code=NewCode}. + hipe_arm_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map) end, CFG). + +ra_bb(BB, Map) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])). ra_code([I|Insns], Map, Accum) -> ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl index d9a360d00c..f9193ca9e0 100644 --- a/lib/hipe/arm/hipe_arm_ra_ls.erl +++ b/lib/hipe/arm/hipe_arm_ra_ls.erl @@ -23,15 +23,12 @@ -module(hipe_arm_ra_ls). -export([ra/3]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_arm_cfg:init(NewDefun), +ra(CFG, SpillIndex, Options) -> SpillLimit = hipe_arm_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + alloc(CFG, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_arm_cfg:init(Defun), - {Coloring, _NewSpillIndex} = +alloc(CFG, SpillIndex, SpillLimit, Options) -> + {Coloring, _NewSpillIndex, Liveness} = regalloc( CFG, hipe_arm_registers:allocatable_gpr()-- @@ -41,16 +38,16 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> [hipe_arm_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, hipe_arm_specific), - {NewDefun, _DidSpill} = + {NewCFG, _DidSpill} = hipe_arm_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), {SpillMap, _NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, hipe_arm_specific, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap), - {NewDefun, Coloring2}. + {NewCFG, Coloring2}. regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> hipe_ls_regalloc:regalloc( diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl index 6201269f44..0ea4b04092 100644 --- a/lib/hipe/arm/hipe_arm_ra_naive.erl +++ b/lib/hipe/arm/hipe_arm_ra_naive.erl @@ -24,7 +24,7 @@ -include("hipe_arm.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, _Coloring_fp, _Options) -> % -> {CFG, Coloring} + {NewCFG,_DidSpill} = + hipe_arm_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, []}. diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl index 40978e65f6..04365f29b0 100644 --- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_arm.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> +check_and_rewrite(CFG, Coloring, Allocator) -> TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), - check_and_rewrite2(Defun, TempMap, Allocator). + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(arm)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_arm_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_arm_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_arm_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl index 24cd929d41..dcf039676b 100644 --- a/lib/hipe/arm/hipe_arm_registers.erl +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -67,6 +67,8 @@ -define(R15, 15). -define(LAST_PRECOLOURED, 15). % must handle both GPR and FPR ranges +-define(LR, ?R14). + -define(ARG0, ?R1). -define(ARG1, ?R2). -define(ARG2, ?R3). @@ -114,7 +116,7 @@ stack_pointer() -> ?STACK_POINTER. proc_pointer() -> ?PROC_POINTER. -lr() -> ?R14. +lr() -> ?LR. pc() -> ?R15. @@ -198,7 +200,9 @@ call_clobbered() -> % does the RA strip the type or not? ]. tailcall_clobbered() -> % tailcall crapola needs one temp - [{?TEMP1,tagged},{?TEMP1,untagged}]. + [{?TEMP1,tagged},{?TEMP1,untagged} + ,{?LR,tagged},{?LR,untagged} + ]. live_at_return() -> [%%{?LR,untagged}, diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl index ad5a559995..2f9181d517 100644 --- a/lib/hipe/arm/hipe_rtl_to_arm.erl +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -138,7 +138,6 @@ mk_shift(S, Dst, Src1, ShiftOp, Src2) -> end. mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)). @@ -148,10 +147,11 @@ mk_shift_ir(S, Dst, Src1, ShiftOp, Src2) -> mk_li(Tmp, Src1, mk_shift_rr(S, Dst, Tmp, ShiftOp, Src2)). -mk_shift_ri(S, Dst, Src1, ShiftOp, Src2) when is_integer(Src2) -> - if Src2 >= 0, Src2 < 32 -> ok; - true -> io:format("~w: excessive immediate shift ~w\n", [?MODULE,Src2]) - end, +mk_shift_ri(S, Dst, Src1, ShiftOp, 0) + when ShiftOp =:= lsl; ShiftOp =:= lsr; ShiftOp =:= asr -> + [hipe_arm:mk_move(S, Dst, Src1)]; +mk_shift_ri(S, Dst, Src1, ShiftOp, Src2) + when is_integer(Src2), Src2 > 0, Src2 < 32 -> Am1 = {Src1,ShiftOp,Src2}, [hipe_arm:mk_move(S, Dst, Am1)]. @@ -178,7 +178,6 @@ mk_arith(S, Dst, Src1, ArithOp, Src2) -> end. mk_arith_ii(S, Dst, Src1, ArithOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)). @@ -276,7 +275,6 @@ mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) -> end. mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> - io:format("~w: RTL branch with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Imm1, mk_branch_ri(Tmp, Cond, Imm2, @@ -471,7 +469,6 @@ mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> end. mk_load_ii(Dst, Base1, Base2, LdOp) -> - io:format("~w: RTL load with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_load_ri(Dst, Tmp, Base2, LdOp)). @@ -484,7 +481,6 @@ mk_load_rr(Dst, Base1, Base2, LdOp) -> [hipe_arm:mk_load(LdOp, Dst, Am2)]. mk_ldrsb_ii(Dst, Base1, Base2) -> - io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_ldrsb_ri(Dst, Tmp, Base2)). @@ -542,7 +538,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -566,13 +562,28 @@ mk_store(Src, Base, Offset, StoreSize) -> end. mk_store2(Src, Base, Offset, StOp) -> - case hipe_arm:is_temp(Offset) of + case hipe_arm:is_temp(Base) of true -> - mk_store_rr(Src, Base, Offset, StOp); - _ -> - mk_store_ri(Src, Base, Offset, StOp) + case hipe_arm:is_temp(Offset) of + true -> + mk_store_rr(Src, Base, Offset, StOp); + _ -> + mk_store_ri(Src, Base, Offset, StOp) + end; + false -> + case hipe_arm:is_temp(Offset) of + true -> + mk_store_ri(Src, Offset, Base, StOp); + _ -> + mk_store_ii(Src, Base, Offset, StOp) + end end. - + +mk_store_ii(Src, Base, Offset, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Offset, StOp)). + mk_store_ri(Src, Base, Offset, StOp) -> hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []). diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 9453ca6c6f..32d5d39460 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -560,6 +560,9 @@ type(erlang, byte_size, 1, Xs, Opaques) -> strict(erlang, byte_size, 1, Xs, fun (_) -> t_non_neg_integer() end, Opaques); %% Guard bif, needs to be here. +type(erlang, ceil, 1, Xs, Opaques) -> + strict(erlang, ceil, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. %% Also much more expressive than anything you could write in a spec... type(erlang, element, 2, Xs, Opaques) -> strict(erlang, element, 2, Xs, @@ -588,6 +591,9 @@ type(erlang, element, 2, Xs, Opaques) -> type(erlang, float, 1, Xs, Opaques) -> strict(erlang, float, 1, Xs, fun (_) -> t_float() end, Opaques); %% Guard bif, needs to be here. +type(erlang, floor, 1, Xs, Opaques) -> + strict(erlang, floor, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. type(erlang, hd, 1, Xs, Opaques) -> strict(erlang, hd, 1, Xs, fun ([X]) -> t_cons_hd(X) end, Opaques); type(erlang, info, 1, Xs, _) -> type(erlang, system_info, 1, Xs); % alias @@ -2341,6 +2347,9 @@ arg_types(erlang, bit_size, 1) -> %% Guard bif, needs to be here. arg_types(erlang, byte_size, 1) -> [t_bitstr()]; +%% Guard bif, needs to be here. +arg_types(erlang, ceil, 1) -> + [t_number()]; arg_types(erlang, halt, 0) -> []; arg_types(erlang, halt, 1) -> @@ -2361,6 +2370,9 @@ arg_types(erlang, element, 2) -> arg_types(erlang, float, 1) -> [t_number()]; %% Guard bif, needs to be here. +arg_types(erlang, floor, 1) -> + [t_number()]; +%% Guard bif, needs to be here. arg_types(erlang, hd, 1) -> [t_cons()]; arg_types(erlang, info, 1) -> diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index 243bb6e25d..c9dd1051f3 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -4749,7 +4749,7 @@ type_from_form1(Name, Args, ArgsLen, R, TypeName, TypeNames, S, D, L, C) -> {Rep, L2, C2} = recur_limit(Fun, D, L1, TypeName, TypeNames), Rep1 = choose_opaque_type(Rep, Type), Rep2 = case cannot_have_opaque(Rep1, TypeName, TypeNames) of - true -> Rep1; + true -> Rep; false -> ArgTypes2 = subst_all_vars_to_any_list(ArgTypes), t_opaque(Module, Name, ArgTypes2, Rep1) @@ -4821,7 +4821,7 @@ remote_from_form1(RemMod, Name, Args, ArgsLen, RemDict, RemType, TypeNames, NewRep1 = choose_opaque_type(NewRep, Type), NewRep2 = case cannot_have_opaque(NewRep1, RemType, TypeNames) of - true -> NewRep1; + true -> NewRep; false -> ArgTypes2 = subst_all_vars_to_any_list(ArgTypes), t_opaque(Mod, Name, ArgTypes2, NewRep1) diff --git a/lib/hipe/flow/cfg.inc b/lib/hipe/flow/cfg.inc index 0bad2a8dd7..cb5f397f64 100644 --- a/lib/hipe/flow/cfg.inc +++ b/lib/hipe/flow/cfg.inc @@ -32,6 +32,8 @@ %% bb(CFG, Label) - returns the basic block named 'Label' from the CFG. %% bb_add(CFG, Label, NewBB) - makes NewBB the basic block associated %% with Label. +%% map_bbs(Fun, CFG) - map over all code without changing control flow. +%% fold_bbs(Fun, Acc, CFG) - fold over the basic blocks in a CFG. %% succ(Map, Label) - returns a list of successors of basic block 'Label'. %% pred(Map, Label) - returns the predecessors of basic block 'Label'. %% fallthrough(CFG, Label) - returns fall-through successor of basic @@ -89,6 +91,7 @@ -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -endif. %%===================================================================== @@ -307,11 +310,7 @@ redirect_phis([I|Rest], OldPred, NewPred, Acc) -> %% @doc Adds a new basic block to a CFG (or updates an existing block). bb_add(CFG, Label, NewBB) -> %% Asserting that the NewBB is a legal basic block - Last = hipe_bb:last(NewBB), - case is_branch(Last) of - true -> ok; - false -> throw({?MODULE, {"Basic block ends without branch", Last}}) - end, + Last = assert_bb(NewBB), %% The order of the elements from branch_successors/1 is %% significant. It determines the basic block order when the CFG is %% converted to linear form. That order may have been tuned for @@ -339,11 +338,53 @@ bb_add(CFG, Label, NewBB) -> HT2, OldSucc -- Succ), CFG#cfg{table = HT3}. +-ifdef(MAP_FOLD_NEEDED). +-spec map_bbs(fun((cfg_lbl(), hipe_bb:bb()) -> hipe_bb:bb()), cfg()) -> cfg(). +%% @doc Map over the code in a CFG without changing any control flow. +map_bbs(Fun, CFG = #cfg{table=HT0}) -> + HT = gb_trees:map( + fun(Lbl, {OldBB, OldSucc, OldPred}) -> + NewBB = Fun(Lbl, OldBB), + %% Assert preconditions + NewLast = assert_bb(NewBB), + OldSucc = remove_duplicates(branch_successors(NewLast)), + {NewBB, OldSucc, OldPred} + end, HT0), + CFG#cfg{table=HT}. + +-spec fold_bbs(fun((cfg_lbl(), hipe_bb:bb(), Acc) -> Acc), Acc, cfg()) -> Acc. +%% @doc Fold over the basic blocks in a CFG in unspecified order. +fold_bbs(Fun, InitAcc, #cfg{table=HT}) -> + gb_trees_fold(fun(Lbl, {BB, _, _}, Acc) -> Fun(Lbl, BB, Acc) end, + InitAcc, HT). + +gb_trees_fold(Fun, InitAcc, Tree) -> + gb_trees_fold_1(Fun, InitAcc, gb_trees:iterator(Tree)). + +gb_trees_fold_1(Fun, InitAcc, Iter0) -> + case gb_trees:next(Iter0) of + none -> InitAcc; + {Key, Value, Iter} -> + gb_trees_fold_1(Fun, Fun(Key, Value, InitAcc), Iter) + end. +-endif. % MAP_FOLD_NEEDED + +assert_bb(BB) -> + assert_bb_is(hipe_bb:code(BB)). + +assert_bb_is([Last]) -> + true = is_branch(Last), + Last; +assert_bb_is([I|Is]) -> + false = is_branch(I), + false = is_label(I), + assert_bb_is(Is). + remove_pred(HT, FromL, PredL) -> case gb_trees:lookup(FromL, HT) of {value, {Block, Succ, Preds}} -> Code = hipe_bb:code(Block), - NewCode = remove_pred_from_phis(Code, PredL, []), + NewCode = remove_pred_from_phis(PredL, Code), NewBlock = hipe_bb:code_update(Block, NewCode), gb_trees:update(FromL, {NewBlock,Succ,lists:delete(PredL,Preds)}, HT); none -> @@ -374,20 +415,20 @@ add_pred(HT, ToL, PredL) -> -ifdef(CFG_CAN_HAVE_PHI_NODES). %% phi-instructions in a removed block's successors must be aware of %% the change. -remove_pred_from_phis(List = [I|Left], Label, Acc) -> +remove_pred_from_phis(Label, List = [I|Left]) -> case is_phi(I) of - true -> - NewAcc = [phi_remove_pred(I, Label)|Acc], - remove_pred_from_phis(Left, Label, NewAcc); + true -> + NewI = phi_remove_pred(I, Label), + [NewI | remove_pred_from_phis(Label, Left)]; false -> - lists:reverse(Acc) ++ List + List end; -remove_pred_from_phis([], _Label, Acc) -> - lists:reverse(Acc). +remove_pred_from_phis(_Label, []) -> + []. -else. %% this is used for code representations like those of back-ends which %% do not have phi-nodes. -remove_pred_from_phis(Code, _Label, _Acc) -> +remove_pred_from_phis(_Label, Code) -> Code. -endif. @@ -927,24 +968,52 @@ merge(BB, BB2, BB2_Label) -> remove_unreachable_code(CFG) -> Start = start_label(CFG), - Reachable = find_reachable([Start], CFG, gb_sets:from_list([Start])), - %% Reachable is an ordset: it comes from gb_sets:to_list/1. - %% So use ordset:subtract instead of '--' below. - Labels = ordsets:from_list(labels(CFG)), - case ordsets:subtract(Labels, Reachable) of - [] -> - CFG; + %% No unreachable block will make another block reachable, so no fixpoint + %% looping is required + Reachable = find_reachable([], [Start], CFG, #{Start=>[]}), + case [L || L <- labels(CFG), not maps:is_key(L, Reachable)] of + [] -> CFG; Remove -> - NewCFG = lists:foldl(fun(X, Acc) -> bb_remove(Acc, X) end, CFG, Remove), - remove_unreachable_code(NewCFG) + HT0 = CFG#cfg.table, + HT1 = lists:foldl(fun gb_trees:delete/2, HT0, Remove), + ReachableP = fun(Lbl) -> maps:is_key(Lbl, Reachable) end, + HT = gb_trees:map(fun(_,B)->prune_preds(B, ReachableP)end, HT1), + CFG#cfg{table=HT} end. -find_reachable([Label|Left], CFG, Acc) -> - NewAcc = gb_sets:add(Label, Acc), - Succ = succ(CFG, Label), - find_reachable([X || X <- Succ, not gb_sets:is_member(X, Acc)] ++ Left, - CFG, NewAcc); -find_reachable([], _CFG, Acc) -> - gb_sets:to_list(Acc). +find_reachable([], [], _CFG, Acc) -> Acc; +find_reachable([Succ|Succs], Left, CFG, Acc) -> + case Acc of + #{Succ := _} -> find_reachable(Succs, Left, CFG, Acc); + #{} -> find_reachable(Succs, [Succ|Left], CFG, Acc#{Succ => []}) + end; +find_reachable([], [Label|Left], CFG, Acc) -> + find_reachable(succ(CFG, Label), Left, CFG, Acc). + +%% Batch prune unreachable predecessors. Asymptotically faster than deleting +%% unreachable blocks one at a time with bb_remove, at least when +%% CFG_CAN_HAVE_PHI_NODES is undefined. Otherwise a phi_remove_preds might be +%% needed to achieve that. +prune_preds(B={Block, Succ, Preds}, ReachableP) -> + case lists:partition(ReachableP, Preds) of + {_, []} -> B; + {NewPreds, Unreach} -> + NewCode = remove_preds_from_phis(Unreach, hipe_bb:code(Block)), + {hipe_bb:code_update(Block, NewCode), Succ, NewPreds} + end. +-ifdef(CFG_CAN_HAVE_PHI_NODES). +remove_preds_from_phis(_, []) -> []; +remove_preds_from_phis(Preds, List=[I|Left]) -> + case is_phi(I) of + false -> List; + true -> + NewI = lists:foldl(fun(L,IA)->phi_remove_pred(IA,L)end, + I, Preds), + [NewI | remove_preds_from_phis(Preds, Left)] + end. +-else. +remove_preds_from_phis(_, Code) -> Code. -endif. + +-endif. %% -ifdef(REMOVE_UNREACHABLE_CODE) diff --git a/lib/hipe/flow/hipe_bb.erl b/lib/hipe/flow/hipe_bb.erl index 2da3a6dc99..08f5e0a0cb 100644 --- a/lib/hipe/flow/hipe_bb.erl +++ b/lib/hipe/flow/hipe_bb.erl @@ -41,6 +41,8 @@ -include("hipe_bb.hrl"). +-export_type([bb/0]). + %% %% Constructs a basic block. %% Returns a basic block: {bb, Code} diff --git a/lib/hipe/flow/liveness.inc b/lib/hipe/flow/liveness.inc index a1caa3e0ad..bffaa4e3df 100644 --- a/lib/hipe/flow/liveness.inc +++ b/lib/hipe/flow/liveness.inc @@ -49,6 +49,10 @@ -endif. -include("../flow/cfg.hrl"). +-include("../main/hipe.hrl"). + +-opaque liveness() :: map(). +-export_type([liveness/0]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -72,7 +76,7 @@ %% The generic liveness analysis %% --spec analyze(cfg()) -> gb_trees:tree(). +-spec analyze(cfg()) -> liveness(). -ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET). analyze(CFG) -> @@ -188,6 +192,7 @@ update_livein(Label, NewLiveIn, Liveness) -> %% %% LiveOut for a block is the union of the successors LiveIn %% +-spec liveout(liveness(), _) -> [_]. liveout(Liveness, L) -> Succ = successors(L, Liveness), @@ -210,7 +215,7 @@ successors(L, Liveness) -> {_GK, _LiveIn, Successors} = liveness_lookup(L, Liveness), Successors. --spec livein(gb_trees:tree(), _) -> [_]. +-spec livein(liveness(), _) -> [_]. livein(Liveness, L) -> {_GK, LiveIn, _Successors} = liveness_lookup(L, Liveness), @@ -292,18 +297,15 @@ strip([{_,Y}|Xs]) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). + liveness_init(List) -> - liveness_init(List, gb_trees:empty()). + maps:from_list(List). -liveness_init([{Lbl, Data}|Left], Acc) -> - liveness_init(Left, gb_trees:insert(Lbl, Data, Acc)); -liveness_init([], Acc) -> - Acc. - liveness_lookup(Label, Liveness) -> - gb_trees:get(Label, Liveness). + maps:get(Label, Liveness). liveness_update(Label, Val, Liveness) -> - gb_trees:update(Label, Val, Liveness). + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/icode/hipe_icode_bincomp.erl b/lib/hipe/icode/hipe_icode_bincomp.erl index 5a27519141..5ee6fe2c87 100644 --- a/lib/hipe/icode/hipe_icode_bincomp.erl +++ b/lib/hipe/icode/hipe_icode_bincomp.erl @@ -40,8 +40,8 @@ -spec cfg(cfg()) -> cfg(). cfg(Cfg1) -> - StartLbls = ordsets:from_list([hipe_icode_cfg:start_label(Cfg1)]), - find_bs_get_integer(StartLbls, Cfg1, StartLbls). + StartLbl = hipe_icode_cfg:start_label(Cfg1), + find_bs_get_integer([StartLbl], Cfg1, set_from_list([StartLbl])). find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> BB = hipe_icode_cfg:bb(Cfg, Lbl), @@ -55,10 +55,10 @@ find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> not_ok -> Cfg end, - Succs = ordsets:from_list(hipe_icode_cfg:succ(NewCfg, Lbl)), - NewSuccs = ordsets:subtract(Succs, Visited), - NewLbls = ordsets:union(NewSuccs, Rest), - NewVisited = ordsets:union(NewSuccs, Visited), + Succs = hipe_icode_cfg:succ(NewCfg, Lbl), + NewSuccs = not_visited(Succs, Visited), + NewLbls = NewSuccs ++ Rest, + NewVisited = set_union(set_from_list(NewSuccs), Visited), find_bs_get_integer(NewLbls, NewCfg, NewVisited); find_bs_get_integer([], Cfg, _) -> Cfg. @@ -177,3 +177,19 @@ make_butlast([{Res, Size}|Rest], Var) -> [Var, hipe_icode:mk_const((1 bsl Size)-1)]), hipe_icode:mk_primop([NewVar], 'bsr', [Var, hipe_icode:mk_const(Size)]) |make_butlast(Rest, NewVar)]. + +%%-------------------------------------------------------------------- +%% Sets + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := _} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_coordinator.erl b/lib/hipe/icode/hipe_icode_coordinator.erl index d2f8748535..b073954ce7 100644 --- a/lib/hipe/icode/hipe_icode_coordinator.erl +++ b/lib/hipe/icode/hipe_icode_coordinator.erl @@ -106,12 +106,29 @@ handle_no_change_done(MFA, {Queue, Busy}) -> {Queue, Busy -- [MFA]}. last_action(PM, ServerPid, Mod, All) -> - lists:foreach(fun (MFA) -> - gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, - receive - {done_rewrite, MFA} -> ok - end - end, All). + last_action(PM, ServerPid, Mod, All, []). + +last_action(_, _, _, [], []) -> ok; +last_action(PM, ServerPid, Mod, [], [MFA|Busy]) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, [], Busy) + end; +last_action(PM, ServerPid, Mod, All0, Busy) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, All0, Busy -- [MFA]) + after 0 -> + case ?MAX_CONCURRENT - length(Busy) of + X when is_integer(X), X > 0 -> + [MFA|All1] = All0, + gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, + last_action(PM, ServerPid, Mod, All1, [MFA|Busy]); + X when is_integer(X) -> + Busy1 = receive {done_rewrite, MFA} -> Busy -- [MFA] end, + last_action(PM, ServerPid, Mod, All0, Busy1) + end + end. restart_funs({Queue, Busy} = QB, PM, All, ServerPid) -> case ?MAX_CONCURRENT - length(Busy) of diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl index 12ed796690..af160769a1 100644 --- a/lib/hipe/icode/hipe_icode_range.erl +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -73,8 +73,8 @@ -type final_fun() :: fun((mfa(), [range()]) -> 'ok'). -type data() :: {mfa(), args_fun(), call_fun(), final_fun()}. -type label() :: non_neg_integer(). --type info() :: gb_trees:tree(). --type work_list() :: {[label()], [label()], sets:set()}. +-type info() :: map(). +-type work_list() :: {[label()], [label()], set(label())}. -type variable() :: #icode_variable{}. -type annotated_variable() :: #icode_variable{}. -type argument() :: #icode_const{} | variable(). @@ -82,10 +82,9 @@ -type instr_split_info() :: {icode_instr(), [{label(),info()}]}. -type last_instr_return() :: {instr_split_info(), range()}. --record(state, {info_map = gb_trees:empty() :: info(), - counter = dict:new() :: dict:dict(), - cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), +-record(state, {info_map = #{} :: info(), + cfg :: cfg(), + liveness :: hipe_icode_ssa:liveness(), ret_type :: range(), lookup_fun :: call_fun(), result_action :: final_fun()}). @@ -187,17 +186,16 @@ safe_analyse(CFG, Data={MFA,_,_,_}) -> rewrite_blocks(State) -> CFG = state__cfg(State), Start = hipe_icode_cfg:start_label(CFG), - rewrite_blocks([Start], State, [Start]). + rewrite_blocks([Start], State, set_from_list([Start])). --spec rewrite_blocks([label()], state(), [label()]) -> state(). +-spec rewrite_blocks([label()], state(), set(label())) -> state(). rewrite_blocks([Next|Rest], State, Visited) -> Info = state__info_in(State, Next), {NewState, NewLabels} = analyse_block(Next, Info, State, true), - NewLabelsSet = ordsets:from_list(NewLabels), - RealNew = ordsets:subtract(NewLabelsSet, Visited), - NewVisited = ordsets:union([RealNew, Visited, [Next]]), - NewWork = ordsets:union([RealNew, Rest]), + RealNew = not_visited(NewLabels, Visited), + NewVisited = set_union(set_from_list(RealNew), Visited), + NewWork = RealNew ++ Rest, rewrite_blocks(NewWork, NewState, NewVisited); rewrite_blocks([], State, _) -> State. @@ -1661,8 +1659,8 @@ state__init(Cfg, {MFA, ArgsFun, CallFun, FinalFun}) -> false -> NewParams = lists:zipwith(fun update_info/2, Params, Ranges), NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), - Info = enter_defines(NewParams, gb_trees:empty()), - InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), + Info = enter_defines(NewParams, #{}), + InfoMap = #{{Start, in} => Info}, #state{info_map=InfoMap, cfg=NewCfg, liveness=Liveness, ret_type=none_type(), lookup_fun=CallFun, result_action=FinalFun} @@ -1700,7 +1698,7 @@ state__info_in(S, Label) -> state__info(S, {Label, in}). state__info(#state{info_map=IM}, Key) -> - gb_trees:get(Key, IM). + maps:get(Key, IM). state__update_info(State, LabelInfo, Rewrite) -> update_info(LabelInfo, State, [], Rewrite). @@ -1721,60 +1719,58 @@ update_info([], State, LabelAcc, _Rewrite) -> state__info_in_update(S=#state{info_map=IM,liveness=Liveness}, Label, Info) -> LabelIn = {Label, in}, - case gb_trees:lookup(LabelIn, IM) of - none -> + case IM of + #{LabelIn := OldInfo} -> + OldVars = maps:keys(OldInfo), + case join_info_in(OldVars, OldInfo, Info) of + fixpoint -> + fixpoint; + NewInfo -> + S#state{info_map=IM#{LabelIn := NewInfo}} + end; + _ -> LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label), NamesLiveIn = [hipe_icode:var_name(Var) || Var <- LiveIn, hipe_icode:is_var(Var)], - OldInfo = gb_trees:empty(), + OldInfo = #{}, case join_info_in(NamesLiveIn, OldInfo, Info) of fixpoint -> - S#state{info_map=gb_trees:insert(LabelIn, OldInfo, IM)}; - NewInfo -> - S#state{info_map=gb_trees:enter(LabelIn, NewInfo, IM)} - end; - {value, OldInfo} -> - OldVars = gb_trees:keys(OldInfo), - case join_info_in(OldVars, OldInfo, Info) of - fixpoint -> - fixpoint; + S#state{info_map=IM#{LabelIn => OldInfo}}; NewInfo -> - S#state{info_map=gb_trees:update(LabelIn, NewInfo, IM)} + S#state{info_map=IM#{LabelIn => NewInfo}} end end. join_info_in(Vars, OldInfo, NewInfo) -> - case join_info_in(Vars, OldInfo, NewInfo, gb_trees:empty(), false) of + case join_info_in(Vars, OldInfo, NewInfo, #{}, false) of {Res, true} -> Res; {_, false} -> fixpoint end. join_info_in([Var|Left], Info1, Info2, Acc, Changed) -> - Type1 = gb_trees:lookup(Var, Info1), - Type2 = gb_trees:lookup(Var, Info2), - case {Type1, Type2} of - {none, none} -> - NewTree = gb_trees:insert(Var, none_type(), Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {none, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {{value, Val}, none} -> - NewTree = gb_trees:insert(Var, Val, Acc), + case {Info1, Info2} of + {#{Var := Val}, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val}, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val1}, {value, Val2}} -> - {NewChanged, NewVal} = + {#{Var := Val1}, #{Var := Val2}} -> + {NewChanged, NewVal} = case sup(Val1, Val2) of Val1 -> {Changed, Val1}; Val -> {true, Val} end, - NewTree = gb_trees:insert(Var, NewVal, Acc), - join_info_in(Left, Info1, Info2, NewTree, NewChanged) + NewTree = Acc#{Var => NewVal}, + join_info_in(Left, Info1, Info2, NewTree, NewChanged); + {_, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, true); + {#{Var := Val}, _} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, Changed); + {_, _} -> + NewTree = Acc#{Var => none_type()}, + join_info_in(Left, Info1, Info2, NewTree, true) end; join_info_in([], _Info1, _Info2, Acc, NewChanged) -> {Acc, NewChanged}. @@ -1786,7 +1782,7 @@ enter_defines([], Info) -> Info. enter_define({PossibleVar, Range = #range{}}, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; false -> Info end; @@ -1795,7 +1791,7 @@ enter_define(PossibleVar, Info) -> true -> case hipe_icode:variable_annotation(PossibleVar) of {range_anno, #ann{range=Range}, _} -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; _ -> Info end; @@ -1810,11 +1806,10 @@ enter_vals(Ins, Info) -> lookup(PossibleVar, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - case gb_trees:lookup(hipe_icode:var_name(PossibleVar), Info) of - none -> - none_type(); - {value, Val} -> - Val + PossibleVarName = hipe_icode:var_name(PossibleVar), + case Info of + #{PossibleVarName := Val} -> Val; + _ -> none_type() end; false -> none_type() @@ -1828,10 +1823,10 @@ lookup(PossibleVar, Info) -> init_work(State) -> %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)), Labels = [hipe_icode_cfg:start_label(state__cfg(State))], - {Labels, [], sets:from_list(Labels)}. + {Labels, [], set_from_list(Labels)}. get_work({[Label|Left], List, Set}) -> - NewWork = {Left, List, sets:del_element(Label, Set)}, + NewWork = {Left, List, maps:remove(Label, Set)}, {Label, NewWork}; get_work({[], [], _Set}) -> fixpoint; @@ -1839,12 +1834,12 @@ get_work({[], List, Set}) -> get_work({lists:reverse(List), [], Set}). add_work(Work = {List1, List2, Set}, [Label|Left]) -> - case sets:is_element(Label, Set) of - true -> + case Set of + #{Label := _} -> add_work(Work, Left); - false -> + _ -> %% io:format("Adding work: ~w\n", [Label]), - add_work({List1, [Label|List2], sets:add_element(Label, Set)}, Left) + add_work({List1, [Label|List2], Set#{Label => []}}, Left) end; add_work(Work, []) -> Work. @@ -1959,3 +1954,21 @@ next_down_limit(X) when is_integer(X), X > -16#8000000 -> -16#8000000; next_down_limit(X) when is_integer(X), X > -16#80000000 -> -16#80000000; next_down_limit(X) when is_integer(X), X > -16#800000000000000 -> -16#800000000000000; next_down_limit(_X) -> neg_inf. + +%%-------------------------------------------------------------------- +%% Sets + +-type set(E) :: #{E => []}. + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := []} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_ssa.erl b/lib/hipe/icode/hipe_icode_ssa.erl index b222fbc7d2..aca13a2ff0 100644 --- a/lib/hipe/icode/hipe_icode_ssa.erl +++ b/lib/hipe/icode/hipe_icode_ssa.erl @@ -34,13 +34,16 @@ -define(LIVENESS, hipe_icode_liveness). -define(LIVENESS_NEEDED, true). +-export_type([liveness/0]). + -include("hipe_icode.hrl"). -include("../ssa/hipe_ssa.inc"). %% Declarations for exported functions which are Icode-specific. --spec ssa_liveness__analyze(#cfg{}) -> gb_trees:tree(). --spec ssa_liveness__livein(_, icode_lbl()) -> [#icode_variable{}]. -%% -spec ssa_liveness__livein(_, icode_lbl(), _) -> [#icode_var{}]. +-opaque liveness() :: liveness(icode_lbl(), #icode_variable{}). +-spec ssa_liveness__analyze(#cfg{}) -> liveness(). +-spec ssa_liveness__livein(liveness(), icode_lbl()) -> [#icode_variable{}]. +%% -spec ssa_liveness__livein(liveness(), icode_lbl(), _) -> [#icode_var{}]. %%---------------------------------------------------------------------- %% Auxiliary operations which seriously differ between Icode and RTL. diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl index 794c27ebcc..3f0e2998f1 100644 --- a/lib/hipe/icode/hipe_icode_type.erl +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -100,7 +100,7 @@ -record(state, {info_map = gb_trees:empty() :: gb_trees:tree(), cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), + liveness :: hipe_icode_ssa:liveness(), arg_types :: [erl_types:erl_type()], ret_type = [t_none()] :: [erl_types:erl_type()], lookupfun :: call_fun(), diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index f8487151d7..6c3a2741b3 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -49,12 +49,12 @@ hipe_amd64_ra_naive, hipe_amd64_ra_postconditions, hipe_amd64_ra_sse2_postconditions, - hipe_amd64_ra_x87_ls, hipe_amd64_registers, hipe_amd64_specific, hipe_amd64_specific_sse2, hipe_amd64_specific_x87, hipe_amd64_spill_restore, + hipe_amd64_sse2, hipe_amd64_x87, hipe_arm, hipe_arm_assemble, @@ -171,6 +171,7 @@ hipe_rtl_to_sparc, hipe_rtl_to_x86, hipe_rtl_varmap, + hipe_segment_trees, hipe_sdi, hipe_sparc, hipe_sparc_assemble, @@ -216,7 +217,6 @@ hipe_x86_ra_ls, hipe_x86_ra_naive, hipe_x86_ra_postconditions, - hipe_x86_ra_x87_ls, hipe_x86_registers, hipe_x86_specific, hipe_x86_specific_x87, diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index 6c525dd143..4e6de2e0dc 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1118,9 +1118,10 @@ help_hiper() -> help_options() -> HostArch = erlang:system_info(hipe_architecture), - O1 = expand_options([o1], HostArch), - O2 = expand_options([o2], HostArch), - O3 = expand_options([o3], HostArch), + O0 = expand_options([o0] ++ ?COMPILE_DEFAULTS, HostArch), + O1 = expand_options([o1] ++ ?COMPILE_DEFAULTS, HostArch), + O2 = expand_options([o2] ++ ?COMPILE_DEFAULTS, HostArch), + O3 = expand_options([o3] ++ ?COMPILE_DEFAULTS, HostArch), io:format("HiPE Compiler Options\n" ++ " Boolean-valued options generally have corresponding " ++ "aliases `no_...',\n" ++ @@ -1139,15 +1140,16 @@ help_options() -> " pp_x86 = pp_native,\n" ++ " pp_amd64 = pp_native,\n" ++ " pp_ppc = pp_native,\n" ++ - " o0,\n" ++ - " o1 = ~p,\n" ++ + " o0 = ~p,\n" ++ + " o1 = ~p ++ o0,\n" ++ " o2 = ~p ++ o1,\n" ++ " o3 = ~p ++ o2.\n", [ordsets:from_list([verbose, debug, time, load, pp_beam, pp_icode, pp_rtl, pp_native, pp_asm, timeout]), expand_options([pp_all], HostArch), - O1 -- [o1], + O0 -- [o0], + (O1 -- O0) -- [o1], (O2 -- O1) -- [o2], (O3 -- O2) -- [o3]]), ok. @@ -1382,8 +1384,15 @@ opt_keys() -> %% Definitions: +o0_opts(_TargetArch) -> + [concurrent_comp, {regalloc,linear_scan}]. + o1_opts(TargetArch) -> - Common = [inline_fp, pmatch, peephole], + Common = [inline_fp, pmatch, peephole, + icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs, + rtl_ssa, rtl_ssa_const_prop, rtl_ssapre, + spillmin_color, use_indexing, remove_comments, + binary_opt, {regalloc,coalescing} | o0_opts(TargetArch)], case TargetArch of ultrasparc -> Common; @@ -1402,11 +1411,8 @@ o1_opts(TargetArch) -> end. o2_opts(TargetArch) -> - Common = [icode_ssa_const_prop, icode_ssa_copy_prop, % icode_ssa_struct_reuse, - icode_type, icode_inline_bifs, icode_call_elim, rtl_lcm, - rtl_ssa, rtl_ssa_const_prop, - spillmin_color, use_indexing, remove_comments, - concurrent_comp, binary_opt | o1_opts(TargetArch)], + Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, + rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])], case TargetArch of T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets [icode_range | Common]; @@ -1416,7 +1422,7 @@ o2_opts(TargetArch) -> o3_opts(TargetArch) -> %% no point checking for target architecture since this is checked in 'o1' - [icode_range, {regalloc,coalescing} | o2_opts(TargetArch)]. + [icode_range | o2_opts(TargetArch)]. %% Note that in general, the normal form for options should be positive. %% This is a good programming convention, so that tests in the code say @@ -1481,7 +1487,8 @@ opt_basic_expansions() -> [{pp_all, [pp_beam, pp_icode, pp_rtl, pp_native]}]. opt_expansions(TargetArch) -> - [{o1, o1_opts(TargetArch)}, + [{o0, o0_opts(TargetArch)}, + {o1, o1_opts(TargetArch)}, {o2, o2_opts(TargetArch)}, {o3, o3_opts(TargetArch)}, {to_llvm, llvm_opts(o3, TargetArch)}, @@ -1528,13 +1535,21 @@ expand_kt2(Opts) -> -spec expand_options(comp_options(), hipe_architecture()) -> comp_options(). -expand_options(Opts, TargetArch) -> +expand_options(Opts0, TargetArch) -> + Opts1 = proplists:normalize(Opts0, [{aliases, opt_aliases()}]), + Opts = normalise_opt_options(Opts1), proplists:normalize(Opts, [{negations, opt_negations()}, - {aliases, opt_aliases()}, {expand, opt_basic_expansions()}, {expand, opt_expansions(TargetArch)}, {negations, opt_negations()}]). +normalise_opt_options([o0|Opts]) -> [o0] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o1|Opts]) -> [o1] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o2|Opts]) -> [o2] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o3|Opts]) -> [o3] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([O|Opts]) -> [O|normalise_opt_options(Opts)]; +normalise_opt_options([]) -> []. + -spec check_options(comp_options()) -> 'ok'. check_options(Opts) -> diff --git a/lib/hipe/misc/Makefile b/lib/hipe/misc/Makefile index 72cfff21a8..e5033e444b 100644 --- a/lib/hipe/misc/Makefile +++ b/lib/hipe/misc/Makefile @@ -44,7 +44,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) # Target Specs # ---------------------------------------------------- ifdef HIPE_ENABLED -HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi +HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi hipe_segment_trees else HIPE_MODULES = endif diff --git a/lib/hipe/misc/hipe_sdi.erl b/lib/hipe/misc/hipe_sdi.erl index fbb4b105f6..5ca64bc669 100644 --- a/lib/hipe/misc/hipe_sdi.erl +++ b/lib/hipe/misc/hipe_sdi.erl @@ -36,10 +36,13 @@ %%------------------------------------------------------------------------ -type hipe_array() :: integer(). % declare this in hipe.hrl or builtin? +-type hipe_vector(E) :: {} | {E} | {E, E} | {E, E, E} | tuple(). -type label() :: non_neg_integer(). -type address() :: non_neg_integer(). +-type parents() :: {hipe_vector(_ :: integer()), hipe_segment_trees:tree()}. + %%------------------------------------------------------------------------ -record(label_data, {address :: address(), @@ -168,9 +171,11 @@ mk_long(N) -> %%% - Since the graph is traversed from child to parent nodes in %%% Step 3, the edges are represented by a vector PARENTS[0..n-1] %%% such that PARENTS[j] = { i | i is a parent of j }. -%%% - An explicit PARENTS graph would have size O(n^2). Instead we -%%% compute PARENTS[j] from the SDI vector when needed. This -%%% reduces memory overheads, and may reduce time overheads too. +%%% - An explicit PARENTS graph would have size O(n^2). Instead, we +%%% observe that (i is a parent of j) iff (j \in range(i)), where +%%% range(i) is a constant function. We can thus precompute all the +%%% ranges i and insert them into a data structure built for such +%%% queries. In this case, we use a segment tree. -spec mk_span(non_neg_integer(), tuple()) -> hipe_array(). mk_span(N, SDIS) -> @@ -188,7 +193,29 @@ initSPAN(SdiNr, N, SDIS, SPAN) -> initSPAN(SdiNr+1, N, SDIS, SPAN) end. -mk_parents(N, SDIS) -> {N,SDIS}. +-spec mk_parents(non_neg_integer(), tuple()) -> parents(). +mk_parents(N, SDIS) -> + PrevSDIS = vector_from_list(select_prev_sdis(N-1, SDIS, [])), + Ranges = parents_generate_ranges(N-1, PrevSDIS, []), + {PrevSDIS, hipe_segment_trees:build(Ranges)}. + +select_prev_sdis(-1, _SDIS, Acc) -> Acc; +select_prev_sdis(SdiNr, SDIS, Acc) -> + #sdi_data{prevSdi=PrevSdi} = vector_sub(SDIS, SdiNr), + select_prev_sdis(SdiNr-1, SDIS, [PrevSdi|Acc]). + +parents_generate_ranges(-1, _PrevSDIS, Acc) -> Acc; +parents_generate_ranges(SdiNr, PrevSDIS, Acc) -> + %% inclusive + {LO,HI} = parents_generate_range(SdiNr, PrevSDIS), + parents_generate_ranges(SdiNr-1, PrevSDIS, [{LO,HI}|Acc]). + +-compile({inline, parents_generate_range/2}). +parents_generate_range(SdiNr, PrevSDIS) -> + PrevSdi = vector_sub(PrevSDIS, SdiNr), + if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards + true -> {PrevSdi+1, SdiNr-1} % backwards + end. %%% "After the structure is built we process it as follows. %%% For any node i whose listed span exceeds the architectural @@ -209,7 +236,7 @@ mk_parents(N, SDIS) -> {N,SDIS}. %%% and PARENTS are no longer useful. -spec update_long(non_neg_integer(), tuple(), hipe_array(), - {non_neg_integer(),tuple()},hipe_array()) -> 'ok'. + parents(),hipe_array()) -> 'ok'. update_long(N, SDIS, SPAN, PARENTS, LONG) -> WKL = initWKL(N-1, SDIS, SPAN, []), processWKL(WKL, SDIS, SPAN, PARENTS, LONG). @@ -225,46 +252,32 @@ initWKL(SdiNr, SDIS, SPAN, WKL) -> end. -spec processWKL([non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(), tuple()}, hipe_array()) -> 'ok'. + parents(), hipe_array()) -> 'ok'. processWKL([], _SDIS, _SPAN, _PARENTS, _LONG) -> ok; -processWKL([Child|WKL], SDIS, SPAN, PARENTS, LONG) -> - WKL2 = updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG), +processWKL([Child|WKL], SDIS, SPAN, PARENTS0, LONG) -> + {WKL2, PARENTS} = + case array_sub(SPAN, Child) of + 0 -> {WKL, PARENTS0}; % removed + _ -> + SdiData = vector_sub(SDIS, Child), + Incr = sdiLongIncr(SdiData), + array_update(LONG, Child, Incr), + array_update(SPAN, Child, 0), % remove child + PARENTS1 = deleteParent(PARENTS0, Child), + PS = parentsOfChild(PARENTS1, Child), + {updateParents(PS, Child, Incr, SDIS, SPAN, WKL), PARENTS1} + end, processWKL(WKL2, SDIS, SPAN, PARENTS, LONG). --spec updateChild(non_neg_integer(), [non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(),tuple()}, hipe_array()) -> [non_neg_integer()]. -updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG) -> - case array_sub(SPAN, Child) of - 0 -> WKL; % removed - _ -> - SdiData = vector_sub(SDIS, Child), - Incr = sdiLongIncr(SdiData), - array_update(LONG, Child, Incr), - array_update(SPAN, Child, 0), % remove child - PS = parentsOfChild(PARENTS, Child), - updateParents(PS, Child, Incr, SDIS, SPAN, WKL) - end. +-spec parentsOfChild(parents(), non_neg_integer()) -> [non_neg_integer()]. +parentsOfChild({_PrevSDIS, SegTree}, Child) -> + hipe_segment_trees:intersect(Child, SegTree). --spec parentsOfChild({non_neg_integer(),tuple()}, - non_neg_integer()) -> [non_neg_integer()]. -parentsOfChild({N,SDIS}, Child) -> - parentsOfChild(N-1, SDIS, Child, []). - --spec parentsOfChild(integer(), tuple(), non_neg_integer(), - [non_neg_integer()]) -> [non_neg_integer()]. -parentsOfChild(-1, _SDIS, _Child, PS) -> PS; -parentsOfChild(SdiNr, SDIS, Child, PS) -> - SdiData = vector_sub(SDIS, SdiNr), - #sdi_data{prevSdi=PrevSdi} = SdiData, - {LO,HI} = % inclusive - if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards - true -> {PrevSdi+1, SdiNr-1} % backwards - end, - NewPS = - if LO =< Child, Child =< HI -> [SdiNr | PS]; - true -> PS - end, - parentsOfChild(SdiNr-1, SDIS, Child, NewPS). +-spec deleteParent(parents(), non_neg_integer()) -> parents(). +deleteParent({PrevSDIS, SegTree0}, Parent) -> + {LO,HI} = parents_generate_range(Parent, PrevSDIS), + SegTree = hipe_segment_trees:delete(Parent, LO, HI, SegTree0), + {PrevSDIS, SegTree}. -spec updateParents([non_neg_integer()], non_neg_integer(), byte(), tuple(), hipe_array(), @@ -297,10 +310,12 @@ updateWKL(SdiNr, SDIS, SdiSpan, WKL) -> false -> [SdiNr|WKL] end. +-compile({inline, sdiSpanIsShort/2}). %% Only called once -spec sdiSpanIsShort(#sdi_data{}, integer()) -> boolean(). sdiSpanIsShort(#sdi_data{si = #sdi_info{lb = LB, ub = UB}}, SdiSpan) -> SdiSpan >= LB andalso SdiSpan =< UB. +-compile({inline, sdiLongIncr/1}). %% Only called once -spec sdiLongIncr(#sdi_data{}) -> byte(). sdiLongIncr(#sdi_data{si = #sdi_info{incr = Incr}}) -> Incr. @@ -361,9 +376,11 @@ applyIncr([{Label,LabelData}|List], INCREMENT, LabelMap) -> %%% Currently implemented as tuples. %%% Used for the 'SDIS' and 'PARENTS' vectors. --spec vector_from_list([#sdi_data{}]) -> tuple(). +-spec vector_from_list([E]) -> hipe_vector(E). vector_from_list(Values) -> list_to_tuple(Values). +-compile({inline, vector_sub/2}). +-spec vector_sub(hipe_vector(E), non_neg_integer()) -> V when V :: E. vector_sub(Vec, I) -> element(I+1, Vec). %%% ADT for mutable integer arrays, indexed from 0 to N-1. @@ -373,8 +390,10 @@ vector_sub(Vec, I) -> element(I+1, Vec). -spec mk_array_of_zeros(non_neg_integer()) -> hipe_array(). mk_array_of_zeros(N) -> hipe_bifs:array(N, 0). +-compile({inline, array_update/3}). -spec array_update(hipe_array(), non_neg_integer(), integer()) -> hipe_array(). array_update(A, I, V) -> hipe_bifs:array_update(A, I, V). +-compile({inline, array_sub/2}). -spec array_sub(hipe_array(), non_neg_integer()) -> integer(). array_sub(A, I) -> hipe_bifs:array_sub(A, I). diff --git a/lib/hipe/misc/hipe_segment_trees.erl b/lib/hipe/misc/hipe_segment_trees.erl new file mode 100644 index 0000000000..22146396c3 --- /dev/null +++ b/lib/hipe/misc/hipe_segment_trees.erl @@ -0,0 +1,181 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2016. All Rights Reserved. +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Segment trees, with a delete operation. +%%% +%%% Keys are the (0-based) indices into the list passed to build/1. +%%% +%%% Range bounds are inclusive. +%%% + +-module(hipe_segment_trees). + +-export([build/1, intersect/2, delete/4]). + +-record(segment_tree, { + lo :: integer(), + hi :: integer(), + root :: tnode() + }). + +%% X =< Mid belongs in Left +-define(NODE(Left, Right, Mid, Segments), {Left, Right, Mid, Segments}). + +-define(POINT_LEAF(Val), Val). +-define(RANGE_LEAF(Lo, Hi), {Lo, Hi}). + +-type segments() :: [non_neg_integer()]. +-type leaf() :: segments(). +-type tnode() :: ?NODE(tnode(), tnode(), integer(), segments()) | leaf(). + +-opaque tree() :: #segment_tree{} | nil. +-export_type([tree/0]). + +%% @doc Builds a segment tree of the given intervals. +-spec build([{integer(), integer()}]) -> tree(). +build(ListOfIntervals) -> + case + lists:usort( + lists:append( + [[Lo, Hi] || {Lo, Hi} <- ListOfIntervals, Lo =< Hi])) + of + [] -> nil; + Endpoints -> + Tree0 = empty_tree_from_endpoints(Endpoints), + [Lo|_] = Endpoints, + Hi = lists:last(Endpoints), + Tree1 = insert_intervals(0, ListOfIntervals, Lo, Hi, Tree0), + Tree = squash_empty_subtrees(Tree1), + #segment_tree{lo=Lo, hi=Hi, root=Tree} + end. + +empty_tree_from_endpoints(Endpoints) -> + Leaves = leaves(Endpoints), + {T, [], _, _} = balanced_bst(Leaves, length(Leaves)), + T. + +leaves([Endpoint]) -> [?POINT_LEAF(Endpoint)]; +leaves([A | [B|_] = Tail]) -> + %% We omit the range leaf if it's empty + case A<B-1 of + true -> [?POINT_LEAF(A),?RANGE_LEAF(A+1,B-1) | leaves(Tail)]; + false -> [?POINT_LEAF(A) | leaves(Tail)] + end. + +balanced_bst(L, S) when S > 1 -> + Sm = S, %% - 1 + S2 = Sm div 2, + S1 = Sm - S2, + {Left, L1, LeftLo, LeftHi} = balanced_bst(L, S1), + {Right, L2, _, RightHi} = balanced_bst(L1, S2), + T = ?NODE(Left, Right, LeftHi, []), + {T, L2, LeftLo, RightHi}; +balanced_bst([?RANGE_LEAF(Lo, Hi) | L], 1) -> + {[], L, Lo, Hi}; +balanced_bst([?POINT_LEAF(Val) | L], 1) -> + {[], L, Val, Val}. + +insert_intervals(_Ix, [], _Lo, _Hi, Tree) -> Tree; +insert_intervals(Ix, [Int|Ints], Lo, Hi, Tree) -> + insert_intervals(Ix + 1, Ints, Lo, Hi, + insert_interval(Ix, Int, Lo, Hi, Tree)). + +insert_interval(_, {Lo, Hi}, _, _, Node) when Lo > Hi -> Node; +insert_interval(I, Int={Lo,Hi}, NLo, NHi, + ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, [I|Segments]); + true -> + Left = case intervals_intersect(Lo, Hi, NLo, Mid) of + true -> insert_interval(I, Int, NLo, Mid, Left0); + false -> Left0 + end, + Right = case intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> insert_interval(I, Int, Mid+1, NHi, Right0); + false -> Right0 + end, + ?NODE(Left, Right, Mid, Segments) + end; +insert_interval(I, {_Lo,_Hi}, _NLo, _NHi, Leaf) -> [I|Leaf]. + +intervals_intersect(ALo, AHi, BLo, BHi) -> + (ALo =< AHi) andalso (BLo =< BHi) %% both nonempty + andalso nonempty_intervals_intersect(ALo, AHi, BLo, BHi). + +%% Purely optional optimisation +squash_empty_subtrees(?NODE(Left0, Right0, Mid, Segs)) -> + build_squash_node(squash_empty_subtrees(Left0), + squash_empty_subtrees(Right0), + Mid, Segs); +squash_empty_subtrees(Leaf) -> Leaf. + +build_squash_node([], [], _, Segs) -> Segs; +build_squash_node(Left, Right, Mid, Segs) -> + ?NODE(Left, Right, Mid, Segs). + +%% @doc Returns the indices of the intervals in the tree that contains Point. +-spec intersect(integer(), tree()) -> [non_neg_integer()]. +intersect(Point, nil) when is_integer(Point) -> []; +intersect(Point, #segment_tree{lo=Lo, hi=Hi, root=Root}) + when is_integer(Point) -> + case Lo =< Point andalso Point =< Hi of + false -> []; + true -> intersect_1(Point, Root, []) + end. + +intersect_1(Point, ?NODE(Left, Right, Mid, Segs), Acc0) -> + Child = if Point =< Mid -> Left; true -> Right end, + intersect_1(Point, Child, Segs ++ Acc0); +intersect_1(_, LeafSegs, Acc) -> LeafSegs ++ Acc. + +%% @doc Deletes the interval {Lo, Hi}, which had index Index in the list passed +%% to build/1. +-spec delete(non_neg_integer(), integer(), integer(), tree()) -> tree(). +delete(_, _, _, nil) -> nil; +delete(_, Lo, Hi, Tree) when Lo > Hi -> Tree; +delete(_, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi}) + when Hi < TLo; Lo > THi -> Tree; +delete(Index, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi, root=Root0}) + when is_integer(Lo), is_integer(Hi) -> + Root = delete_1(Index, Lo, Hi, TLo, THi, Root0), + Tree#segment_tree{root=Root}. + +delete_1(I, Lo, Hi, NLo, NHi, ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, delete_2(Segments, I)); + true -> + Left = case nonempty_intervals_intersect(Lo, Hi, NLo, Mid) of + true -> delete_1(I, Lo, Hi, NLo, Mid, Left0); + false -> Left0 + end, + Right = case nonempty_intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> delete_1(I, Lo, Hi, Mid+1, NHi, Right0); + false -> Right0 + end, + %% We could do build_squash_node here, is it worth it? + ?NODE(Left, Right, Mid, Segments) + end; +delete_1(I, _Lo, _Hi, _NLo, _NHi, Leaf) -> delete_2(Leaf, I). + +delete_2([I|Segs], I) -> Segs; +delete_2([S|Segs], I) -> [S|delete_2(Segs,I)]. + +-compile({inline,nonempty_intervals_intersect/4}). +nonempty_intervals_intersect(ALo, AHi, BLo, BHi) -> + (BLo =< AHi) andalso (ALo =< BHi). diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl index 4eeb1d71db..6bb6980ad5 100644 --- a/lib/hipe/opt/hipe_spillmin.erl +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -49,7 +49,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_spillmin). --export([stackalloc/6, mapmerge/2]). +-export([stackalloc/6, stackalloc/7, mapmerge/2]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -69,16 +69,24 @@ -spec stackalloc(#cfg{}, [_], non_neg_integer(), comp_options(), module(), hipe_temp_map()) -> - {hipe_spill_map(), non_neg_integer()}. + {hipe_spill_map(), non_neg_integer()}. stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> + Liveness = Target:analyze(CFG), + stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, Target, TempMap). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, Target, TempMap) -> case proplists:get_bool(spillmin_color, Options) of false -> - ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, + ?option_time(hipe_spillmin_scan:stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, Target, TempMap), "Spill minimize, linear scan", Options); true -> - ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, + ?option_time(hipe_spillmin_color:stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, Target, TempMap), "Spill minimize, graph coloring", Options) end. diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl index 7c23de44b4..9c62fdf11a 100644 --- a/lib/hipe/opt/hipe_spillmin_color.erl +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -41,7 +41,7 @@ -module(hipe_spillmin_color). --export([stackalloc/6]). +-export([stackalloc/7]). %%-ifndef(DO_ASSERT). %%-define(DO_ASSERT, true). @@ -66,13 +66,13 @@ %% where Location is {spill,M}. %% {spill,M} denotes the Mth spilled node --spec stackalloc(#cfg{}, [_], non_neg_integer(), +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), comp_options(), module(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> +stackalloc(CFG, Live, _StackSlots, SpillIndex, _Options, Target, TempMap) -> ?report2("building IG~n", []), - {IG, NumNodes} = build_ig(CFG, Target, TempMap), + {IG, NumNodes} = build_ig(CFG, Live, Target, TempMap), {Cols, MaxColors} = color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1), SortedCols = lists:sort(Cols), @@ -167,8 +167,8 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> %% Returns {Interference_graph, Number_Of_Nodes} %% -build_ig(CFG, Target, TempMap) -> - try build_ig0(CFG, Target, TempMap) +build_ig(CFG, Live, Target, TempMap) -> + try build_ig0(CFG, Live, Target, TempMap) catch error:Rsn -> exit({regalloc, build_ig, Rsn}) end. @@ -185,8 +185,7 @@ setup_ets0([X|Xs], Table, N) -> ets:insert(Table, {X, N}), setup_ets0(Xs, Table, N+1). -build_ig0(CFG, Target, TempMap) -> - Live = Target:analyze(CFG), +build_ig0(CFG, Live, Target, TempMap) -> TempMapping = map_spilled_temporaries(TempMap), TempMappingTable = setup_ets(TempMapping), NumSpilled = length(TempMapping), diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl index 06b68e1934..53cbc0014b 100644 --- a/lib/hipe/opt/hipe_spillmin_scan.erl +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -60,7 +60,7 @@ -module(hipe_spillmin_scan). --export([stackalloc/6]). +-export([stackalloc/7]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -85,15 +85,12 @@ %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec stackalloc(#cfg{}, [_], non_neg_integer(), +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), comp_options(), module(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, Target, TempMap) -> ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), - %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), USIntervals = calculate_intervals(CFG, Liveness, Options, Target, TempMap), %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -538,9 +535,6 @@ extend_interval(Pos, {Beginning, End}) %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> - Target:analyze(CFG). - bb(CFG, L, Target) -> Target:bb(CFG, L). diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl index 0fa96162f6..380e791bc1 100644 --- a/lib/hipe/ppc/hipe_ppc.erl +++ b/lib/hipe/ppc/hipe_ppc.erl @@ -167,8 +167,10 @@ temp_is_precoloured(#ppc_temp{reg=Reg,type=Type}) -> _ -> hipe_ppc_registers:is_precoloured_gpr(Reg) end. -mk_simm16(Value) -> #ppc_simm16{value=Value}. -mk_uimm16(Value) -> #ppc_uimm16{value=Value}. +mk_simm16(Value) when Value >= -(1 bsl 15), Value < (1 bsl 15) -> + #ppc_simm16{value=Value}. +mk_uimm16(Value) when Value >= 0, Value < (1 bsl 16) -> + #ppc_uimm16{value=Value}. mk_mfa(M, F, A) -> #ppc_mfa{m=M, f=F, a=A}. @@ -240,7 +242,11 @@ mk_li(Dst, Value, Tail) -> % Dst can be R0 Value =< 16#7FFFFFFF -> mk_li32(Dst, R0, Value, Tail); true -> - Highest = (Value bsr 48), % Value@highest + Highest = case (Value bsr 48) of % Value@highest + TopBitSet when TopBitSet >= (1 bsl 15) -> + TopBitSet - (1 bsl 16); % encoder needs it to be negative + FitsSimm16 -> FitsSimm16 + end, Higher = (Value bsr 32) band 16#FFFF, % Value@higher High = (Value bsr 16) band 16#FFFF, % Value@h Low = Value band 16#FFFF, % Value@l diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl index ff9da01b11..d89ff6235c 100644 --- a/lib/hipe/ppc/hipe_ppc_assemble.erl +++ b/lib/hipe/ppc/hipe_ppc_assemble.erl @@ -175,7 +175,8 @@ do_slwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> {Dst, Src1, {sh,N}, {mb,0}, {me,31-N}}. do_srwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> - {Dst, Src1, {sh,32-N}, {mb,N}, {me,31}}. + %% SH should be 0 (not 32) when N is 0 + {Dst, Src1, {sh,(32-N) band 31}, {mb,N}, {me,31}}. do_srawi_src2({uimm,N}) when is_integer(N), 0 =< N, N < 32 -> {sh,N}. @@ -184,7 +185,8 @@ do_sldi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 64 -> {Dst, Src1, {sh6,N}, {me6,63-N}}. do_srdi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 64 -> - {Dst, Src1, {sh6,64-N}, {mb6,N}}. + %% SH should be 0 (not 64) when N is 0 + {Dst, Src1, {sh6,(64-N) band 63}, {mb6,N}}. do_sradi_src2({uimm,N}) when is_integer(N), 0 =< N, N < 64 -> {sh6,N}. @@ -246,6 +248,7 @@ do_load(I) -> case LdOp of 'ld' -> do_disp_ds(Disp); 'ldu' -> do_disp_ds(Disp); + 'lwa' -> do_disp_ds(Disp); _ -> do_disp(Disp) end, NewBase = do_reg(Base), diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl index 34d4bf54c5..ee9b4432e0 100644 --- a/lib/hipe/ppc/hipe_ppc_cfg.erl +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1, params/1, reverse_postorder/1]). @@ -34,6 +35,7 @@ -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_ppc.hrl"). -include("../flow/cfg.hrl"). diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index ff0450270f..8d37159ad8 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -24,16 +24,14 @@ -include("hipe_ppc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_ppc:defun_formals(Defun)), - Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_ppc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), - CFG0 = hipe_ppc_cfg:init(Defun), - Liveness = hipe_ppc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_ppc_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = hipe_ppc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_ppc_registers:nr_args(), Formals). @@ -44,27 +42,16 @@ fix_formals(_, []) -> []. do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> Context = mk_context(Liveness, Formals, Temps, ClobbersLR), - CFG1 = do_blocks(CFG0, Context), + CFG1 = hipe_ppc_cfg:map_bbs( + fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG0), do_prologue(CFG1, Context). -do_blocks(CFG, Context) -> - Labels = hipe_ppc_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). - -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), - Block = hipe_ppc_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -573,29 +560,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr([I|Insns]) -> - case I of - #pseudo_call{} -> true; - %% mtspr to lr cannot occur yet - _ -> clobbers_lr(Insns) - end; -clobbers_lr([]) -> false. +clobbers_lr(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_ppc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -624,16 +623,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_ppc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl index fd5cc2befb..5d1b0d0305 100644 --- a/lib/hipe/ppc/hipe_ppc_main.erl +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -24,8 +24,10 @@ rtl_to_ppc(MFA, RTL, Options) -> PPC1 = hipe_rtl_to_ppc:translate(RTL), - PPC2 = hipe_ppc_ra:ra(PPC1, Options), - PPC3 = hipe_ppc_frame:frame(PPC2), + PPC1CFG = hipe_ppc_cfg:init(PPC1), + PPC2CFG = hipe_ppc_ra:ra(PPC1CFG, Options), + PPC3CFG = hipe_ppc_frame:frame(PPC2CFG), + PPC3 = hipe_ppc_cfg:linearise(PPC3CFG), PPC4 = hipe_ppc_finalise:finalise(PPC3), ppc_pp(PPC4, MFA, Options), {native, powerpc, {unprofiled, PPC4}}. diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl index 87c776f5d1..bfb4d35139 100644 --- a/lib/hipe/ppc/hipe_ppc_ra.erl +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -22,36 +22,36 @@ -module(hipe_ppc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_ppc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG0)), + {CFG1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + hipe_regalloc_loop:ra_fp(CFG0, Options, hipe_coalescing_regalloc, hipe_ppc_specific_fp); false -> - {Defun0,[],0} + {CFG0,[],0} end, - %% hipe_ppc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG1)), + {CFG2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_ppc_ra_ls:ra(CFG1, SpillIndex, Options); naive -> - hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_ppc_ra_naive:ra(CFG1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_ppc_pp:pp(Defun2), - hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG2)), + hipe_ppc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). +ra(CFG, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl index ea163221c2..78f123116e 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -23,13 +23,14 @@ -export([finalise/3]). -include("hipe_ppc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_ppc:defun_code(Defun), - {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(ppc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_ppc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl index 6e8304467e..52562fc321 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_ls.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -23,15 +23,12 @@ -module(hipe_ppc_ra_ls). -export([ra/3]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_ppc_cfg:init(NewDefun), +ra(CFG, SpillIndex, Options) -> SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + alloc(CFG, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_ppc_cfg:init(Defun), - {Coloring, _NewSpillIndex} = +alloc(CFG, SpillIndex, SpillLimit, Options) -> + {Coloring, _NewSpillIndex, Liveness} = regalloc( CFG, hipe_ppc_registers:allocatable_gpr()-- @@ -41,16 +38,16 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> [hipe_ppc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, hipe_ppc_specific), - {NewDefun, _DidSpill} = + {NewCFG, _DidSpill} = hipe_ppc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, hipe_ppc_specific, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Coloring2}. regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> hipe_ls_regalloc:regalloc( diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl index 24995be252..ba269ae981 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_naive.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -24,7 +24,7 @@ -include("hipe_ppc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, _Coloring_fp, _Options) -> % -> {CFG, Coloring} + {NewCFG,_DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl index 0b16ec3891..412aeeeba6 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> +check_and_rewrite(CFG, Coloring, Allocator) -> TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl index 821aa66c11..553fac882b 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -23,13 +23,16 @@ -export([check_and_rewrite/2]). -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring) -> +check_and_rewrite(CFG, Coloring) -> TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl index a994659616..a01e67a789 100644 --- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -1031,7 +1031,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -1056,13 +1056,28 @@ mk_store(Src, Base1, Base2, StoreSize) -> end. mk_store2(Src, Base1, Base2, StOp) -> - case hipe_ppc:is_temp(Base2) of + case hipe_ppc:is_temp(Base1) of true -> - mk_store_rr(Src, Base1, Base2, StOp); + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_rr(Src, Base1, Base2, StOp); + _ -> + mk_store_ri(Src, Base1, Base2, StOp) + end; _ -> - mk_store_ri(Src, Base1, Base2, StOp) + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_ri(Src, Base2, Base1, StOp); + _ -> + mk_store_ii(Src, Base1, Base2, StOp) + end end. - + +mk_store_ii(Src, Base, Disp, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Disp, StOp)). + mk_store_ri(Src, Base, Disp, StOp) -> hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index aaa4418f37..ceb535f1c7 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -123,7 +123,6 @@ $(EBIN)/hipe_amd64_specific_x87.beam: hipe_x86_specific_x87.erl $(EBIN)/hipe_coalescing_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_graph_coloring_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_ig.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_spillcost.hrl -$(EBIN)/hipe_ig_moves.beam: ../util/hipe_vectors.hrl $(EBIN)/hipe_ls_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_optimistic_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_regalloc_loop.beam: ../main/hipe.hrl diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 50e5869d45..2e5804337d 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -42,7 +42,9 @@ reg_nr/1, non_alloc/1, allocatable/0, - physical_name/1, + allocatable/1, + temp0/0, + physical_name/1, all_precoloured/0, new_spill_index/1, %% used by hipe_ls_regalloc var_range/1, @@ -52,7 +54,8 @@ %% callbacks for hipe_regalloc_loop -export([defun_to_cfg/1, - check_and_rewrite/2]). + check_and_rewrite/2, + check_and_rewrite/3]). %%---------------------------------------------------------------------------- @@ -60,11 +63,15 @@ %%---------------------------------------------------------------------------- -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite(CFG, Coloring). + +check_and_rewrite(CFG, Coloring, Strategy) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite( + CFG, Coloring, Strategy). reverse_postorder(CFG) -> hipe_x86_cfg:reverse_postorder(CFG). @@ -75,8 +82,8 @@ breadthorder(CFG) -> postorder(CFG) -> hipe_x86_cfg:postorder(CFG). -is_global(_Reg) -> - false. +is_global(Reg) -> + hipe_amd64_registers:sse2_temp0() =:= Reg. is_fixed(_Reg) -> false. @@ -109,7 +116,16 @@ liveout(BB_in_out_liveness, Label) -> %% Registers stuff allocatable() -> - hipe_amd64_registers:allocatable_sse2(). + allocatable('normal'). + +allocatable('normal') -> + hipe_amd64_registers:allocatable_sse2(); +allocatable('linearscan') -> + hipe_amd64_registers:allocatable_sse2() -- + [hipe_amd64_registers:sse2_temp0()]. + +temp0() -> + hipe_amd64_registers:sse2_temp0(). all_precoloured() -> allocatable(). diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index 4e34cb1d99..e04d80f690 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -53,11 +53,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_arm_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_arm_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring) -> + hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). reverse_postorder(CFG) -> hipe_arm_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl index e2f817d369..bbb2e3ecf0 100644 --- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -51,13 +51,14 @@ %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% Build interference graph ?debug_msg("Build IG\n", []), - IG = hipe_ig:build(CFG, Target), + Liveness = Target:analyze(CFG), + IG = hipe_ig:build(CFG, Liveness, Target), %% io:format("IG: ~p\n", [IG]), ?debug_msg("Init\n", []), @@ -94,9 +95,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% io:format("color0:~w\nColor1:~w\nNodes:~w\nNodes2:~w\nNum_Temps:~w\n",[Color0,Color1,Node_sets,Node_sets2,Num_Temps]), ?debug_msg("Build mapping ~p\n", [Node_sets2]), - Coloring = build_namelist(Node_sets2, SpillIndex, Alias0, Color1), + {Coloring, SpillIndex2} = + build_namelist(Node_sets2, SpillIndex, Alias0, Color1), ?debug_msg("Coloring ~p\n", [Coloring]), - Coloring. + {Coloring, SpillIndex2, Liveness}. %%---------------------------------------------------------------------- %% Function: do_coloring diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl index bc6e442236..1a3ea90c05 100644 --- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -82,7 +82,7 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> PhysRegs = Target:allocatable(), ?report2("building IG~n", []), - {IG, Spill} = build_ig(CFG, Target), + {IG, Spill, Live} = build_ig(CFG, Target), %% check_ig(IG), ?report3("graph: ~p~nphysical regs: ~p~n", [list_ig(IG), PhysRegs]), @@ -102,7 +102,7 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> Coloring = [{X, {reg, X}} || X <- NotAllocatable] ++ Cols, ?ASSERT(check_coloring(Coloring, IG, Target)), - {Coloring, NewSpillIndex}. + {Coloring, NewSpillIndex, Live}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -126,7 +126,7 @@ build_ig0(CFG, Target) -> empty_ig(NumN), empty_spill(NumN), Target), - {normalize_ig(IG), Spill}. + {normalize_ig(IG), Spill, Live}. build_ig_bbs([], _CFG, _Live, IG, Spill, _Target) -> {IG, Spill}; diff --git a/lib/hipe/regalloc/hipe_ig.erl b/lib/hipe/regalloc/hipe_ig.erl index 8fd5d0df1f..47f8f6d08d 100644 --- a/lib/hipe/regalloc/hipe_ig.erl +++ b/lib/hipe/regalloc/hipe_ig.erl @@ -28,7 +28,7 @@ -module(hipe_ig). --export([build/2, +-export([build/3, nodes_are_adjacent/3, node_spill_cost/2, node_adj_list/2, @@ -368,10 +368,9 @@ initial_ig(NumTemps, Target) -> %% An interference graph for the given CFG. %%---------------------------------------------------------------------- --spec build(#cfg{}, atom()) -> #igraph{}. +-spec build(#cfg{}, Liveness::_, atom()) -> #igraph{}. -build(CFG, Target) -> - BBs_in_out_liveness = Target:analyze(CFG), +build(CFG, BBs_in_out_liveness, Target) -> Labels = Target:labels(CFG), %% How many temporaries exist? NumTemps = Target:number_of_temporaries(CFG), diff --git a/lib/hipe/regalloc/hipe_ig_moves.erl b/lib/hipe/regalloc/hipe_ig_moves.erl index b679453de0..2a70606dab 100644 --- a/lib/hipe/regalloc/hipe_ig_moves.erl +++ b/lib/hipe/regalloc/hipe_ig_moves.erl @@ -25,8 +25,6 @@ new_move/3, get_moves/1]). --include("../util/hipe_vectors.hrl"). - %%----------------------------------------------------------------------------- %% The main data structure; its fields are: %% - movelist : mapping from temp to set of associated move numbers @@ -34,11 +32,13 @@ %% - moveinsns : list of move instructions, in descending move number order %% - moveset : set of move instructions --record(ig_moves, {movelist :: hipe_vector(), +-record(ig_moves, {movelist :: movelist(), nrmoves = 0 :: non_neg_integer(), moveinsns = [] :: [{_,_}], moveset = gb_sets:empty() :: gb_sets:set()}). +-type movelist() :: hipe_vectors:vector(ordsets:ordset(non_neg_integer())). + %%----------------------------------------------------------------------------- -spec new(non_neg_integer()) -> #ig_moves{}. @@ -66,7 +66,8 @@ new_move(Dst, Src, IG_moves) -> moveset = gb_sets:insert(MoveInsn, MoveSet)} end. --spec add_movelist(non_neg_integer(), non_neg_integer(), hipe_vector()) -> hipe_vector(). +-spec add_movelist(non_neg_integer(), non_neg_integer(), movelist()) + -> movelist(). add_movelist(MoveNr, Temp, MoveList) -> AssocMoves = hipe_vectors:get(MoveList, Temp), @@ -74,7 +75,7 @@ add_movelist(MoveNr, Temp, MoveList) -> %% ordset due to the ordsets:union in hipe_coalescing_regalloc:combine(). hipe_vectors:set(MoveList, Temp, ordsets:add_element(MoveNr, AssocMoves)). --spec get_moves(#ig_moves{}) -> {hipe_vector(), non_neg_integer(), tuple()}. +-spec get_moves(#ig_moves{}) -> {movelist(), non_neg_integer(), tuple()}. get_moves(IG_moves) -> % -> {MoveList, NrMoves, MoveInsns} {IG_moves#ig_moves.movelist, diff --git a/lib/hipe/regalloc/hipe_ls_regalloc.erl b/lib/hipe/regalloc/hipe_ls_regalloc.erl index d24b803524..c318927077 100644 --- a/lib/hipe/regalloc/hipe_ls_regalloc.erl +++ b/lib/hipe/regalloc/hipe_ls_regalloc.erl @@ -56,7 +56,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_ls_regalloc). --export([regalloc/7]). +-export([regalloc/7, regalloc/8]). %%-define(DEBUG,1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -96,10 +96,18 @@ %% @end %%- - - - - - - - - - - - - - - - - - - - - - - - regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + regalloc(CFG, undefined, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). + +regalloc(CFG, Liveness0, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), + Liveness = case Liveness0 of + undefined -> + L=liveness(CFG, Target), + ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), + L; + _ -> Liveness0 + end, USIntervals = calculate_intervals(CFG, Liveness, Entrypoints, Options, Target), ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -108,10 +116,10 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> %% ?debug_msg("Intervals ~w\n", [Intervals]), ?debug_msg("No intervals: ~w\n",[length(Intervals)]), ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), - Allocation = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), + {Coloring, NewSpillIndex} + = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), - Allocation. - + {Coloring, NewSpillIndex, Liveness}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl index 2ed9ec3b45..67674be14c 100644 --- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -78,7 +78,7 @@ %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- -ifdef(COMPARE_ITERATED_OPTIMISTIC). regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> @@ -86,8 +86,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG_O = hipe_ig:build(CFG, Target), - IG = hipe_ig:build(CFG, Target), + Liveness = Target:analyze(CFG), + IG_O = hipe_ig:build(CFG, Liveness, Target), + IG = hipe_ig:build(CFG, Liveness, Target), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -199,9 +200,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring,SpillIndex2} = + build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - SortedColoring = { sort_stack(element(1, Coloring)), element(2, Coloring)}, + SortedColoring = {sort_stack(Coloring), SpillIndex2}, ?debug_msg("SortedColoring ~p\n",[SortedColoring]), %%Coloring. ?debug_msg("----------------------Assign colors _O\n",[]), @@ -217,14 +219,15 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SortedColoring_O = {sort_stack(element(1, Coloring_O)), element(2, Coloring_O)}, ?debug_msg("SortedColoring_O ~p\n",[SortedColoring_O]), sanity_compare(SortedColoring_O, SortedColoring), - Coloring. + {Coloring,SpillIndex2,Liveness}. -else. regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("optimistic ~w\n",[Target]), ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG = hipe_ig:build(CFG, Target), + Liveness = Target:analyze(CFG), + IG = hipe_ig:build(CFG, Liveness, Target), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -316,9 +319,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring, SpillIndex2} = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - Coloring. + {Coloring,SpillIndex2,Liveness}. -endif. %%---------------------------------------------------------------------- diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index c49b1e510f..988501c96f 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -53,11 +53,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring) -> + hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). reverse_postorder(CFG) -> hipe_ppc_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 454aa4c686..6f00111777 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -53,11 +53,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring) -> + hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). reverse_postorder(CFG) -> hipe_ppc_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index d29615a3a0..fa42cdd0fb 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -38,20 +38,19 @@ ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> ?inc_counter(ra_calls_counter, 1), CFG = TargetMod:defun_to_cfg(Defun), SpillLimit = TargetMod:number_of_temporaries(CFG), - alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). + alloc(Defun, CFG, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). -alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> +alloc(Defun, CFG, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> ?inc_counter(ra_iteration_counter, 1), - CFG = TargetMod:defun_to_cfg(Defun), - {Coloring, _NewSpillIndex} = - RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit, TargetMod, Options), + {Coloring, _NewSpillIndex, Liveness} = + RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit,TargetMod, Options), {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), case DidSpill of false -> %% No new temps, we are done. ?add_spills(Options, _NewSpillIndex), TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), - {TempMap2, NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, TargetMod, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), @@ -63,7 +62,8 @@ alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> %% end, {NewDefun, Coloring2, NewSpillIndex2}; _ -> + NewCFG = TargetMod:defun_to_cfg(NewDefun), %% Since SpillLimit is used as a low-water-mark %% the list of temps not to spill is uninteresting. - alloc(NewDefun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) + alloc(NewDefun, NewCFG, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) end. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 8d34604f84..29d0908caf 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -53,11 +53,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring) -> + hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). reverse_postorder(CFG) -> hipe_sparc_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 2edd3cb47e..08c2541b41 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -53,11 +53,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring) -> + hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). reverse_postorder(CFG) -> hipe_sparc_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index 4edf8674b7..f1007c95ed 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -68,11 +68,11 @@ -export([defun_to_cfg/1, check_and_rewrite/2]). -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +defun_to_cfg(AlreadyACFG) -> + AlreadyACFG. -check_and_rewrite(Defun, Coloring) -> - ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring) -> + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal'). reverse_postorder(CFG) -> hipe_x86_cfg:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index ece07cb2f9..0c022d5a27 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -32,7 +32,7 @@ -endif. -module(?HIPE_X86_SPECIFIC_X87). --export([allocatable/0, +-export([allocatable/1, is_precoloured/1, %% var_range/1, %% def_use/1, @@ -58,7 +58,14 @@ physical_name/1, breadthorder/1, postorder/1, - reverse_postorder/1]). + reverse_postorder/1]). + +%% callbacks for hipe_x86_ra_ls +-export([check_and_rewrite/3]). + +%% Rewrite happens in hipe_x86_ra_finalise:finalise/4 +check_and_rewrite(CFG, _Coloring, 'linearscan') -> + {CFG, false}. breadthorder(CFG) -> hipe_x86_cfg:breadthorder(CFG). @@ -103,7 +110,7 @@ liveout(BB_in_out_liveness,Label) -> %% Registers stuff -allocatable() -> +allocatable('linearscan') -> ?HIPE_X86_REGISTERS:allocatable_x87(). is_precoloured(Reg) -> diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl index fb9c0c196d..ad23df80d2 100644 --- a/lib/hipe/rtl/hipe_rtl_binary.erl +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -106,10 +106,20 @@ create_lbls(0) -> %%------------------------------------------------------------------------------ get_word_integer(Var, Register, SystemLimitLblName, FalseLblName) -> - [EndLbl] = create_lbls(1), - EndName = hipe_rtl:label_name(EndLbl), - get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, EndName, EndName, - [EndLbl]). + case hipe_rtl:is_imm(Var) of + true -> + TaggedVal = hipe_rtl:imm_value(Var), + true = hipe_tagscheme:is_fixnum(TaggedVal), + Val = hipe_tagscheme:fixnum_val(TaggedVal), + if Val < 0 -> [hipe_rtl:mk_goto(FalseLblName)]; + true -> [hipe_rtl:mk_move(Register, hipe_rtl:mk_imm(Val))] + end; + false -> + [EndLbl] = create_lbls(1), + EndName = hipe_rtl:label_name(EndLbl), + get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, + EndName, EndName, [EndLbl]) + end. get_word_integer(Var, Register, SystemLimitLblName, FalseLblName, TrueLblName, BigLblName, Tail) -> diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl index eef5ba8d96..e170fec3d6 100644 --- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -625,7 +625,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), % no immediates allowed + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StOp = conv_stop(hipe_rtl:store_size(I)), @@ -649,13 +649,27 @@ mk_store(StOp, Src, Base1, Base2) -> end. mk_store2(StOp, Src, Base1, Base2) -> - case hipe_sparc:is_temp(Base2) of + case hipe_sparc:is_temp(Base1) of true -> - mk_store_rr(StOp, Src, Base1, Base2); + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_rr(StOp, Src, Base1, Base2); + _ -> + mk_store_ri(StOp, Src, Base1, Base2) + end; _ -> - mk_store_ri(StOp, Src, Base1, Base2) + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_ri(StOp, Src, Base2, Base1); + _ -> + mk_store_ii(StOp, Src, Base1, Base2) + end end. +mk_store_ii(StOp, Src, Base, Disp) -> + Tmp = new_untagged_temp(), + mk_set(Base, Tmp, mk_store_ri(StOp, Src, Tmp, Disp)). + mk_store_ri(StOp, Src, Base, Disp) -> hipe_sparc:mk_store(StOp, Src, Base, Disp, 'new', []). @@ -750,13 +764,25 @@ xaluop_commutes(XAluOp) -> xaluop_is_shift(XAluOp) -> case XAluOp of + 'add' -> false; + 'addcc' -> false; + 'and' -> false; + 'andcc' -> false; + 'cmpcc' -> false; + 'ldsb' -> false; + 'ldub' -> false; + 'lduw' -> false; + 'or' -> false; 'sll' -> true; - 'srl' -> true; + %% 'sllx' -> true; + 'smul' -> false; 'sra' -> true; - 'sllx' -> true; - 'srlx' -> true; - 'srax' -> true; - _ -> false + %% 'srax' -> true; + 'srl' -> true; + %% 'srlx' -> true; + 'sub' -> false; + 'subcc' -> false; + 'xor' -> false end. %%% Convert an extended SPARC AluOp back to a plain AluOp. @@ -764,9 +790,23 @@ xaluop_is_shift(XAluOp) -> xaluop_normalise(XAluOp) -> case XAluOp of - 'cmp' -> 'sub'; + 'add' -> 'add'; + 'addcc' -> 'addcc'; + 'and' -> 'and'; + 'andcc' -> 'andcc'; + %% 'cmp' -> 'sub'; 'cmpcc' -> 'subcc'; - _ -> XAluOp + 'ldsb' -> 'ldsb'; + 'ldub' -> 'ldub'; + 'lduw' -> 'lduw'; + 'or' -> 'or'; + 'sll' -> 'sll'; + 'smul' -> 'smul'; + 'sra' -> 'sra'; + 'srl' -> 'srl'; + 'sub' -> 'sub'; + 'subcc' -> 'subcc'; + 'xor' -> 'xor' end. %%% Convert an RTL condition code. diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl index 0b2c77f27b..957c8a0d24 100644 --- a/lib/hipe/sparc/hipe_sparc_cfg.erl +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1]). diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index a42c1983f4..37f29e660a 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -25,16 +25,14 @@ -include("hipe_sparc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_sparc:defun_formals(Defun)), - Temps0 = all_temps(hipe_sparc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_sparc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersRA = clobbers_ra(hipe_sparc:defun_code(Defun)), - CFG0 = hipe_sparc_cfg:init(Defun), - Liveness = hipe_sparc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersRA), - hipe_sparc_cfg:linearise(CFG1). + ClobbersRA = clobbers_ra(CFG), + Liveness = hipe_sparc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersRA). fix_formals(Formals) -> fix_formals(hipe_sparc_registers:nr_args(), Formals). @@ -550,29 +548,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers RA. %%% -clobbers_ra(Insns) -> - case Insns of - [#pseudo_call{}|_] -> true; - %% moves to RA cannot occur yet - [_|Rest] -> clobbers_ra(Rest); - [] -> false +clobbers_ra(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_sparc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_sparc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -601,16 +611,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_sparc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_sparc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_sparc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/sparc/hipe_sparc_main.erl b/lib/hipe/sparc/hipe_sparc_main.erl index c16751c7bd..8e9c560bb2 100644 --- a/lib/hipe/sparc/hipe_sparc_main.erl +++ b/lib/hipe/sparc/hipe_sparc_main.erl @@ -24,12 +24,14 @@ rtl_to_sparc(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_sparc:translate(RTL), + CFG1 = hipe_sparc_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun1), - Defun2 = hipe_sparc_ra:ra(Defun1, Options), + CFG2 = hipe_sparc_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_sparc_pp:pp(Defun2), - Defun3 = hipe_sparc_frame:frame(Defun2), + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + CFG3 = hipe_sparc_frame:frame(CFG2), + Defun3 = hipe_sparc_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun3), Defun4 = hipe_sparc_finalise:finalise(Defun3), diff --git a/lib/hipe/sparc/hipe_sparc_ra.erl b/lib/hipe/sparc/hipe_sparc_ra.erl index afea8c9b4c..5f955c2058 100644 --- a/lib/hipe/sparc/hipe_sparc_ra.erl +++ b/lib/hipe/sparc/hipe_sparc_ra.erl @@ -22,36 +22,36 @@ -module(hipe_sparc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_sparc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG0)), + {CFG1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + hipe_regalloc_loop:ra_fp(CFG0, Options, hipe_coalescing_regalloc, hipe_sparc_specific_fp); false -> - {Defun0,[],0} + {CFG0,[],0} end, - %% hipe_sparc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG1)), + {CFG2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - hipe_sparc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_sparc_ra_ls:ra(CFG1, SpillIndex, Options); naive -> - hipe_sparc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_sparc_ra_naive:ra(CFG1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_sparc_pp:pp(Defun2), - hipe_sparc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + hipe_sparc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). +ra(CFG, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl index dc1e69c101..5d6056071c 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -23,13 +23,14 @@ -export([finalise/3]). -include("hipe_sparc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_sparc:defun_code(Defun), - {_, SpillLimit} = hipe_sparc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(sparc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_sparc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/sparc/hipe_sparc_ra_ls.erl b/lib/hipe/sparc/hipe_sparc_ra_ls.erl index 19e7c92d2f..ced9addd31 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_ls.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_ls.erl @@ -23,15 +23,12 @@ -module(hipe_sparc_ra_ls). -export([ra/3]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_sparc_cfg:init(NewDefun), +ra(CFG, SpillIndex, Options) -> SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + alloc(CFG, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_sparc_cfg:init(Defun), - {Coloring, _NewSpillIndex} = +alloc(CFG, SpillIndex, SpillLimit, Options) -> + {Coloring, _NewSpillIndex, Liveness} = regalloc( CFG, hipe_sparc_registers:allocatable_gpr()-- @@ -41,16 +38,16 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> [hipe_sparc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, hipe_sparc_specific), - {NewDefun, _DidSpill} = + {NewCFG, _DidSpill} = hipe_sparc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, hipe_sparc_specific, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Coloring2}. regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> hipe_ls_regalloc:regalloc( diff --git a/lib/hipe/sparc/hipe_sparc_ra_naive.erl b/lib/hipe/sparc/hipe_sparc_ra_naive.erl index b6c33dec6c..f621d94553 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_naive.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_naive.erl @@ -24,7 +24,7 @@ -include("hipe_sparc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_sparc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, _Coloring_fp, _Options) -> % -> {CFG, Coloring} + {NewCFG,_DidSpill} = + hipe_sparc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, []}. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl index ab31b3c8d9..0336a6c59f 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> +check_and_rewrite(CFG, Coloring, Allocator) -> TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl index d893ac26e9..d3e1d8cf93 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -25,13 +25,16 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring) -> +check_and_rewrite(CFG, Coloring) -> TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl index 884215702b..6681a10070 100644 --- a/lib/hipe/sparc/hipe_sparc_registers.erl +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -86,6 +86,8 @@ -define(I7, 31). -define(LAST_PRECOLOURED,31). % must handle both GRP and FPR ranges +-define(RA, ?O7). + -define(ARG0, ?O1). -define(ARG1, ?O2). -define(ARG2, ?O3). @@ -174,7 +176,7 @@ stack_pointer() -> ?STACK_POINTER. proc_pointer() -> ?PROC_POINTER. -return_address() -> ?O7. +return_address() -> ?RA. g0() -> ?G0. @@ -283,7 +285,9 @@ call_clobbered() -> % does the RA strip the type or not? ]. tailcall_clobbered() -> % tailcall crapola needs one temp - [{?TEMP1,tagged},{?TEMP1,untagged}]. + [{?TEMP1,tagged},{?TEMP1,untagged} + ,{?RA,tagged},{?RA,untagged} + ]. live_at_return() -> [{?HEAP_POINTER,untagged}, diff --git a/lib/hipe/ssa/hipe_ssa_liveness.inc b/lib/hipe/ssa/hipe_ssa_liveness.inc index 78488c65fc..46df8b66ad 100644 --- a/lib/hipe/ssa/hipe_ssa_liveness.inc +++ b/lib/hipe/ssa/hipe_ssa_liveness.inc @@ -40,6 +40,15 @@ ssa_liveness__livein/2]). %% ssa_liveness__livein/3], %% ssa_liveness__liveout/2]). +-type set(E) :: gb_sets:set(E). +-type liveness(Label, Var) :: + #{Label => {{Gen :: set(Var), + Kill :: set(Var), + {TotalDirGen :: set(Var), + DirGen :: gb_trees:tree(Label, set(Var))}}, + LiveIn :: set(Var), + LiveOut :: set(Var), + Successors :: [Label]}}. -endif. %% -ifdef(DEBUG_LIVENESS). %% -export([pp_liveness/1]). @@ -262,21 +271,15 @@ update_directed_gen({Pred, Var}, Map)-> %% %% liveness %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). liveness_init(List) -> - liveness_init1(List, gb_trees:empty()). + maps:from_list(List). -liveness_init1([{Label, Info}|Left], Map) -> - liveness_init1(Left, gb_trees:insert(Label, Info, Map)); -liveness_init1([], Map) -> - Map. - -liveness_lookup(Label, Map) -> - {value, Info} = gb_trees:lookup(Label, Map), - Info. - -liveness_update(Label, NewInfo, Map) -> - gb_trees:update(Label, NewInfo, Map). +liveness_lookup(Label, Liveness) -> + maps:get(Label, Liveness). +liveness_update(Label, Val, Liveness) -> + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/test/hipe_testsuite_driver.erl b/lib/hipe/test/hipe_testsuite_driver.erl index 03ec7adfd0..88576775ca 100644 --- a/lib/hipe/test/hipe_testsuite_driver.erl +++ b/lib/hipe/test/hipe_testsuite_driver.erl @@ -99,7 +99,7 @@ write_suite(Suite) -> write_header(#suite{suitename = SuiteName, outputfile = OutputFile, testcases = TestCases}) -> Exports = format_export(TestCases), - TimeLimit = 3, %% with 1 or 2 it fails on some slow machines... + TimeLimit = 6, %% with 1, 2, or 3 it fails on some slow machines... io:format(OutputFile, "%% ATTENTION!\n" "%% This is an automatically generated file. Do not edit.\n\n" @@ -168,6 +168,10 @@ run(TestCase, Dir, _OutDir) -> HiPEOpts = try TestCase:hipe_options() catch error:undef -> [] end, {ok, TestCase} = hipe:c(TestCase, HiPEOpts), ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o1|HiPEOpts]), + ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o0|HiPEOpts]), + ok = TestCase:test(), ToLLVM = try TestCase:to_llvm() catch error:undef -> true end, case ToLLVM andalso hipe:llvm_support_available() of true -> diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile index 66e9421c25..04de7f7823 100644 --- a/lib/hipe/util/Makefile +++ b/lib/hipe/util/Makefile @@ -113,4 +113,3 @@ release_docs_spec: $(EBIN)/hipe_timing.beam: ../main/hipe.hrl -$(EBIN)/hipe_vectors.beam: hipe_vectors.hrl diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl index 7f6c8e91c2..90d736d02c 100644 --- a/lib/hipe/util/hipe_vectors.erl +++ b/lib/hipe/util/hipe_vectors.erl @@ -33,11 +33,25 @@ %% list_to_vector/1, list/1]). --include("hipe_vectors.hrl"). +%%-define(USE_TUPLES, true). +%%-define(USE_GBTREES, true). +-define(USE_ARRAYS, true). + +-type vector() :: vector(_). +-export_type([vector/0, vector/1]). + +-spec new(non_neg_integer(), V) -> vector(E) when V :: E. +-spec set(vector(E), non_neg_integer(), V :: E) -> vector(E). +-spec get(vector(E), non_neg_integer()) -> E. +-spec size(vector(_)) -> non_neg_integer(). +-spec vector_to_list(vector(E)) -> [E]. +%% -spec list_to_vector([E]) -> vector(E). +-spec list(vector(E)) -> [{non_neg_integer(), E}]. %% --------------------------------------------------------------------- -ifdef(USE_TUPLES). +-opaque vector(_) :: tuple(). new(N, V) -> erlang:make_tuple(N, V). @@ -68,8 +82,8 @@ get(Vec, Ix) -> element(Ix+1, Vec). %% --------------------------------------------------------------------- -ifdef(USE_GBTREES). +-opaque vector(E) :: gb_trees:tree(non_neg_integer(), E). --spec new(non_neg_integer(), _) -> hipe_vector(). new(N, V) when is_integer(N), N >= 0 -> gb_trees:from_orddict(mklist(N, V)). @@ -81,14 +95,11 @@ mklist(M, N, V) when M < N -> mklist(_, _, _) -> []. --spec size(hipe_vector()) -> non_neg_integer(). size(V) -> gb_trees:size(V). --spec list(hipe_vector()) -> [{_, _}]. list(Vec) -> gb_trees:to_list(Vec). -%% -spec list_to_vector([_]) -> hipe_vector(). %% list_to_vector(Xs) -> %% gb_trees:from_orddict(index(Xs, 0)). %% @@ -97,16 +108,29 @@ list(Vec) -> %% index([],_) -> %% []. --spec vector_to_list(hipe_vector()) -> [_]. vector_to_list(V) -> gb_trees:values(V). --spec set(hipe_vector(), non_neg_integer(), _) -> hipe_vector(). set(Vec, Ix, V) -> gb_trees:update(Ix, V, Vec). --spec get(hipe_vector(), non_neg_integer()) -> any(). get(Vec, Ix) -> gb_trees:get(Ix, Vec). -endif. %% ifdef USE_GBTREES + +%% --------------------------------------------------------------------- + +-ifdef(USE_ARRAYS). +%%-opaque vector(E) :: array:array(E). +-type vector(E) :: array:array(E). % Work around dialyzer bug + +new(N, V) -> array:new(N, {default, V}). +size(V) -> array:size(V). +list(Vec) -> array:to_orddict(Vec). +%% list_to_vector(Xs) -> array:from_list(Xs). +vector_to_list(V) -> array:to_list(V). +set(Vec, Ix, V) -> array:set(Ix, V, Vec). +get(Vec, Ix) -> array:get(Ix, Vec). + +-endif. %% ifdef USE_ARRAYS diff --git a/lib/hipe/util/hipe_vectors.hrl b/lib/hipe/util/hipe_vectors.hrl deleted file mode 100644 index d4556e9dc4..0000000000 --- a/lib/hipe/util/hipe_vectors.hrl +++ /dev/null @@ -1,29 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%%-define(USE_TUPLES, true). --define(USE_GBTREES, true). - --ifdef(USE_TUPLES). --type hipe_vector() :: tuple(). --endif. - --ifdef(USE_GBTREES). --type hipe_vector() :: gb_trees:tree(). --endif. diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..9b21270426 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,7 +60,6 @@ MODULES=hipe_rtl_to_x86 \ hipe_x86_ra_ls \ hipe_x86_ra_naive \ hipe_x86_ra_postconditions \ - hipe_x86_ra_x87_ls \ hipe_x86_registers \ hipe_x86_spill_restore \ hipe_x86_x87 @@ -133,7 +132,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..4c8c98551c 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -85,7 +85,7 @@ conv_insn(I, Map, Data) -> true -> conv_shift(Dst, Src1, BinOp, Src2); false -> - conv_alu(Dst, Src1, BinOp, Src2, []) + conv_alu_nocc(Dst, Src1, BinOp, Src2, []) end, {FixSrc1++FixSrc2++I2, Map2, Data}; #alub{} -> @@ -144,7 +144,7 @@ conv_insn(I, Map, Data) -> {I2, Map, Data}; #load{} -> {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of {byte, signed} -> @@ -171,6 +171,7 @@ conv_insn(I, Map, Data) -> Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), I2 = [hipe_x86:mk_move(Src, Dst)], {I2, Map0, Data}; + #move{src=Dst, dst=Dst} -> {[], Map, Data}; #move{} -> {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +183,11 @@ conv_insn(I, Map, Data) -> I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), {FixArgs++I2, Map0, Data}; #store{} -> - {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixSrc++FixOff++I2, Map2, Data}; + {FixPtr++FixSrc++FixOff++I2, Map2, Data}; #switch{} -> % this one also updates Data :-( %% from hipe_rtl2sparc, but we use a hairy addressing mode %% instead of doing the arithmetic manually @@ -206,7 +207,7 @@ conv_insn(I, Map, Data) -> {I2, Map1, NewData}; #fload{} -> {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], {I2, Map2, Data}; @@ -249,6 +250,22 @@ conv_insn(I, Map, Data) -> %%% Finalise the conversion of a 3-address ALU operation, taking %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) + andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'add', Src2, Tail); + true -> % Use LEA + Type = typeof_dst(Dst), + Mem = case hipe_x86:is_temp(Src1) of + true -> hipe_x86:mk_mem(Src1, Src2, Type); + false -> hipe_x86:mk_mem(Src2, Src1, Type) + end, + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> + conv_alu(Dst, Src1, BinOp, Src2, Tail). + conv_alu(Dst, Src1, 'imul', Src2, Tail) -> mk_imul(Src1, Src2, Dst, Tail); conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -572,6 +589,16 @@ conv_fun(Fun, Map) -> end end. +conv_src_noimm(Opnd, Map) -> + R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), + case hipe_x86:is_imm(Src) of + false -> R; + true -> + Tmp = new_untagged_temp(), + {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], + Tmp, NewMap} + end. + %%% Convert an RTL source operand (imm/var/reg). conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..b9f9c711f3 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -24,7 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, pred/2, - bb/2, bb_add/3]). + bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1, params/1, arity/1, redirect_jmp/3]). @@ -33,6 +33,7 @@ -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_x86.hrl"). -include("../flow/cfg.hrl"). @@ -107,7 +108,7 @@ mk_goto(Label) -> hipe_x86:mk_jmp_label(Label). is_label(I) -> - hipe_x86:is_label(I). + case I of #label{} -> true; _ -> false end. label_name(Label) -> hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 8851ead250..fc782571bf 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -46,15 +46,13 @@ -include("../x86/hipe_x86.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> - Formals = fix_formals(hipe_x86:defun_formals(Defun)), - Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> + Formals = fix_formals(hipe_x86_cfg:params(CFG0)), + Temps0 = all_temps(CFG0, Formals), + MinFrame = defun_minframe(CFG0), Temps = ensure_minframe(MinFrame, Temps0), - CFG0 = hipe_x86_cfg:init(Defun), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps), - hipe_x86_cfg:linearise(CFG1). + do_body(CFG0, Liveness, Formals, Temps). fix_formals(Formals) -> fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +67,14 @@ do_body(CFG0, Liveness, Formals, Temps) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_x86_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -609,39 +598,46 @@ temp_is_pseudo(Temp) -> %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_x86_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). + +-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, + tset_filter/2, tset_to_list/1]}). tset_empty() -> - gb_sets:new(). + #{}. tset_size(S) -> - gb_sets:size(S). + map_size(S). tset_insert(S, T) -> - gb_sets:add_element(T, S). + S#{T => []}. -tset_add_list(S, Ts) -> - gb_sets:union(S, gb_sets:from_list(Ts)). +tset_add_list(S, []) -> S; +tset_add_list(S, [T|Ts]) -> + tset_add_list(S#{T => []}, Ts). -tset_del_list(S, Ts) -> - gb_sets:subtract(S, gb_sets:from_list(Ts)). +tset_del_list(S, []) -> S; +tset_del_list(S, [T|Ts]) -> + tset_del_list(maps:remove(T,S), Ts). tset_filter(S, F) -> - gb_sets:filter(F, S). + maps:filter(fun(K, _V) -> F(K) end, S). tset_to_list(S) -> - gb_sets:to_list(S). + maps:keys(S). %%% %%% Compute minimum permissible frame size, ignoring spilled temps. @@ -649,16 +645,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..341269b698 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -53,19 +53,23 @@ ?RTL_TO_X86(MFA, RTL, Options) -> Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), "RTL-to-"?X86STR, Options), - SpillRest = + TransCFG = ?option_time(hipe_x86_cfg:init(Translated), + ?X86STR" to cfg", Options), + SpillRestCFG = case proplists:get_bool(caller_save_spill_restore, Options) of true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), ?X86STR" spill restore", Options); false -> - Translated + TransCFG end, - Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), - ?X86STR" register allocation", Options), - Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), - ?X86STR" frame", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), + AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), + ?X86STR" register allocation", Options), + FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), + ?X86STR" frame", Options), + Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), + ?X86STR" linearise", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl index 939baeccec..4515822a34 100644 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -95,7 +95,8 @@ peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, %% ElimBinALMDouble %% ---------------- -peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) -> +peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) + when not is_record(Dst, x86_mem) -> peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]); diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..3af333ab4b 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -41,60 +41,80 @@ %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun0, Options) -> - %% ?HIPE_X86_PP:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), - %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> + hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, + 0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> + %% hipe_x86_cfg:pp(CFG0), + {CFG1, Coloring_fp, SpillIndex, Liveness} = + case ra_fp(CFG0, Options) of + {G, C, I} -> {G, C, I, undefined}; + {_,_,_,_}=T -> T + end, + %% hipe_x86_cfg:pp(CFG1), ?start_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun1)), - element(2,hipe_x86:defun_var_range(Defun1))), - {Defun2, Coloring} + code_size(CFG1), + element(2,hipe_gensym:var_range(x86))), + {CFG2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); naive -> - ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + ?HIPE_X86_RA_NAIVE:ra(CFG1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, ?stop_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun2)), - element(2,hipe_x86:defun_var_range(Defun2))), - %% ?HIPE_X86_PP:pp(Defun2), - ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + code_size(CFG2), + element(2,hipe_gensym:var_range(x86))), + %% hipe_x86_cfg:pp(CFG2), + ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> - case proplists:get_bool(inline_fp, Options) and - (proplists:get_value(regalloc, Options) =/= naive) of - true -> - case proplists:get_bool(x87, Options) of - true -> - hipe_amd64_ra_x87_ls:ra(Defun, Options); - false -> - hipe_regalloc_loop:ra_fp(Defun, Options, - hipe_coalescing_regalloc, - hipe_amd64_specific_sse2) - end; - false -> - {Defun,[],0} +ra_fp(CFG, Options) -> + Regalloc0 = proplists:get_value(regalloc, Options), + {Regalloc, TargetMod} = + case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of + false -> {naive, undefined}; + true -> + case proplists:get_bool(x87, Options) of + true -> {linear_scan, hipe_amd64_specific_x87}; + false -> {Regalloc0, hipe_amd64_specific_sse2} + end + end, + case Regalloc of + coalescing -> ra_fp(CFG, Options, hipe_coalescing_regalloc, TargetMod); + optimistic -> ra_fp(CFG, Options, hipe_optimistic_regalloc, TargetMod); + graph_color -> ra_fp(CFG, Options, hipe_graph_coloring_regalloc, + TargetMod); + linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Options, TargetMod); + naive -> {CFG,[],0}; + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) end. + +ra_fp(CFG, Options, RegAllocMod, TargetMod) -> + hipe_regalloc_loop:ra_fp(CFG, Options, RegAllocMod, TargetMod). -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Options) -> case proplists:get_bool(inline_fp, Options) of true -> - hipe_x86_ra_x87_ls:ra(Defun, Options); + hipe_x86_ra_ls:ra_fp(CFG, Options, hipe_x86_specific_x87); false -> - {Defun,[],0} + {CFG,[],0} end. -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..62009abb76 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -25,23 +25,36 @@ -define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_SSE2, hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr), Expr). -else. -define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_X87, hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),). -endif. -module(?HIPE_X86_RA_FINALISE). -export([finalise/4]). -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> - Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> + CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), case proplists:get_bool(x87, Options) of true -> - ?HIPE_X86_X87:map(Defun1); + ?HIPE_X86_X87:map(CFG1); _ -> - Defun1 + case + proplists:get_bool(inline_fp, Options) + and (proplists:get_value(regalloc, Options) =:= linear_scan) + of + %% Ugly, but required to avoid Dialyzer complaints about "Unknown + %% function" hipe_x86_sse2:map/1 + ?IF_HAS_SSE2(true -> + ?HIPE_X86_SSE2:map(CFG1);) + false -> + CFG1 + end end. %%% @@ -50,15 +63,16 @@ finalise(Defun, TempMap, FpMap, Options) -> %%% but I just want this to work now) %%% -finalise_ra(Defun, [], [], _Options) -> - Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> - Code = hipe_x86:defun_code(Defun), - {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> + CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> + {_, SpillLimit} = hipe_gensym:var_range(x86), Map = mk_ra_map(TempMap, SpillLimit), FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - NewCode = ra_code(Code, Map, FpMap0), - Defun#defun{code=NewCode}. + hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). ra_code(Code, Map, FpMap) -> [ra_insn(I, Map, FpMap) || I <- Code]. diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..9f019f9561 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -35,40 +35,65 @@ -endif. -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/3]). -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) -> SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( CFG), ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Options, TargetMod) -> + ?inc_counter(ra_calls_counter,1), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = TargetMod:number_of_temporaries(CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + {Coloring,NewSpillIndex,Liveness} = + regalloc(CFG, + undefined, + TargetMod:allocatable('linearscan'), + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + TargetMod), + + {NewCFG, _DidSpill} = + TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + TargetMod, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + ?add_spills(Options, NewSpillIndex), + {NewCFG, Coloring2, NewSpillIndex2, Liveness}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness0, SpillIndex, SpillLimit, Options) -> ?inc_counter(ra_iteration_counter,1), %% ?HIPE_X86_PP:pp(Defun), - CFG = hipe_x86_cfg:init(Defun), - {Coloring, NewSpillIndex} = + {Coloring, NewSpillIndex, Liveness} = regalloc( - CFG, + CFG, + Liveness0, ?HIPE_X86_REGISTERS:allocatable()-- [?HIPE_X86_REGISTERS:temp1(), ?HIPE_X86_REGISTERS:temp0()], [hipe_x86_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, ?HIPE_X86_SPECIFIC), - {NewDefun, _DidSpill} = + {NewCFG, _DidSpill} = ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), %% ?HIPE_X86_PP:pp(NewDefun), TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, ?HIPE_X86_SPECIFIC, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), @@ -79,8 +104,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> ok end, ?add_spills(Options, NewSpillIndex), - {NewDefun, Coloring2}. + {NewCFG, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, +regalloc(CFG,Liveness,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, + Target) -> + hipe_ls_regalloc:regalloc(CFG,Liveness,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..27e5af4aee 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -39,9 +39,8 @@ -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> - #defun{code=Code0} = X86Defun, - Code1 = do_insns(Code0), +ra(CFG0, Coloring_fp, Options) -> + CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), NofSpilledFloats = count_non_float_spills(Coloring_fp), NofFloats = length(Coloring_fp), ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,10 +48,12 @@ ra(X86Defun, Coloring_fp, Options) -> NofSpilledFloats - NofFloats), TempMap = [], - {X86Defun#defun{code=Code1, - var_range={0, hipe_gensym:get_var(x86)}}, + {CFG, TempMap}. +do_bb(_Lbl, BB) -> + hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). + count_non_float_spills(Coloring_fp) -> count_non_float_spills(Coloring_fp, 0). diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..c73d029b9b 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -40,14 +40,18 @@ -include("../main/hipe.hrl"). -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) -> %% io:format("Converting\n"), TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), %% io:format("Rewriting\n"), - #defun{code=Code0} = Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, - DidSpill}. + do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -169,14 +173,22 @@ do_jmp_switch(I, TempMap, Strategy) -> %%% Fix a lea op. do_lea(I, TempMap, Strategy) -> - #lea{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], - true} + #lea{mem=Mem0,temp=Temp0} = I, + {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), + case Mem of + #x86_mem{base=Base, off=#x86_imm{value=0}} -> + %% We've decayed into a move due to both operands being memory (there's an + %% 'add' in FixMem). + {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; + #x86_mem{} -> + {StoreTemp, Temp, DidSpill2} = + case is_mem_opnd(Temp0, TempMap) of + false -> {[], Temp0, false}; + true -> + Temp1 = clone2(Temp0, temp0(Strategy)), + {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} + end, + {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} end. %%% Fix a move op. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> - ?inc_counter(ra_calls_counter,1), - CFG = hipe_x86_cfg:init(Defun), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - - {Coloring,NewSpillIndex} = - ?HIPE_X86_RA_LS:regalloc(Cfg, - ?HIPE_X86_SPECIFIC_X87:allocatable(), - [hipe_x86_cfg:start_label(Cfg)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC_X87), - - ?add_spills(Options, NewSpillIndex), - {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..32b1eb7b40 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -25,13 +25,11 @@ -ifdef(HIPE_AMD64). -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(X86STR, "amd64"). -else. -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(X86STR, "x86"). -endif. @@ -51,15 +49,13 @@ -include("../flow/cfg.hrl"). % Added for the definition of #cfg{} %% Main function -spill_restore(Defun, Options) -> - CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), - CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), - hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> + CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), + ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). %% Performs the first pass of the algorithm. %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> - CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) -> %% get the labels bottom up Labels = hipe_x86_cfg:postorder(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..10bb6aa75c 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -41,13 +41,12 @@ %%---------------------------------------------------------------------- -map(Defun) -> - CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) -> %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - hipe_x86_cfg:linearise(CFG1). + CFG1. do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of |