diff options
Diffstat (limited to 'lib/hipe')
108 files changed, 4070 insertions, 1738 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile index 8dc2af2679..617f6749ac 100644 --- a/lib/hipe/amd64/Makefile +++ b/lib/hipe/amd64/Makefile @@ -57,10 +57,11 @@ MODULES=hipe_amd64_assemble \ hipe_amd64_ra_naive \ hipe_amd64_ra_postconditions \ hipe_amd64_ra_sse2_postconditions \ - hipe_amd64_ra_x87_ls \ hipe_amd64_registers \ hipe_amd64_spill_restore \ + hipe_amd64_subst \ hipe_amd64_x87 \ + hipe_amd64_sse2 \ hipe_rtl_to_amd64 ERL_FILES=$(MODULES:%=%.erl) @@ -125,10 +126,10 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl $(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl +$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl $(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl index b1f7bd7572..d062c0b37c 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -21,7 +21,7 @@ -module(hipe_amd64_ra_sse2_postconditions). --export([check_and_rewrite/2]). +-export([check_and_rewrite/2, check_and_rewrite/3]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -29,40 +29,48 @@ -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(AMD64Defun, Coloring) -> +check_and_rewrite(AMD64CFG, Coloring) -> + check_and_rewrite(AMD64CFG, Coloring, 'normal'). + +check_and_rewrite(AMD64CFG, Coloring, Strategy) -> %%io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2), + TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2,no_context), %%io:format("Rewriting\n"), - #defun{code=Code0} = AMD64Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, [], false), - {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, - DidSpill}. - -do_insns([I|Insns], TempMap, Accum, DidSpill0) -> - {NewIs, DidSpill1} = do_insn(I, TempMap), - do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); -do_insns([], _TempMap, Accum, DidSpill) -> + do_bbs(hipe_x86_cfg:labels(AMD64CFG), TempMap, Strategy, AMD64CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), + DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> {lists:reverse(Accum), DidSpill}. -do_insn(I, TempMap) -> % Insn -> {Insn list, DidSpill} +do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} case I of #fmove{} -> - do_fmove(I, TempMap); + do_fmove(I, TempMap, Strategy); #fp_unop{} -> - do_fp_unop(I, TempMap); + do_fp_unop(I, TempMap, Strategy); #fp_binop{} -> - do_fp_binop(I, TempMap); + do_fp_binop(I, TempMap, Strategy); _ -> %% All non sse2 ops {[I], false} end. %%% Fix an fp_binop. -do_fp_binop(I, TempMap) -> +do_fp_binop(I, TempMap, Strategy) -> #fp_binop{src=Src,dst=Dst} = I, case is_mem_opnd(Dst, TempMap) of true -> - Tmp = clone(Dst), + Tmp = clone(Dst, Strategy), {[#fmove{src=Dst, dst=Tmp}, I#fp_binop{src=Src,dst=Tmp}, #fmove{src=Tmp,dst=Dst}], @@ -71,11 +79,11 @@ do_fp_binop(I, TempMap) -> {[I], false} end. -do_fp_unop(I, TempMap) -> +do_fp_unop(I, TempMap, Strategy) -> #fp_unop{arg=Arg} = I, case is_mem_opnd(Arg, TempMap) of true -> - Tmp = clone(Arg), + Tmp = clone(Arg, Strategy), {[#fmove{src=Arg, dst=Tmp}, I#fp_unop{arg=Tmp}, #fmove{src=Tmp,dst=Arg}], @@ -85,7 +93,7 @@ do_fp_unop(I, TempMap) -> end. %%% Fix an fmove op. -do_fmove(I, TempMap) -> +do_fmove(I, TempMap, Strategy) -> #fmove{src=Src,dst=Dst} = I, case (is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap)) @@ -93,7 +101,7 @@ do_fmove(I, TempMap) -> orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap)) of true -> - Tmp = spill_temp(double), + Tmp = spill_temp(double, Strategy), {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}], true}; false -> @@ -106,86 +114,40 @@ is_float_temp(#x86_mem{}) -> false. %%% Check if an operand denotes a memory cell (mem or pseudo). is_mem_opnd(Opnd, TempMap) -> - R = - case Opnd of - #x86_mem{} -> true; - #x86_temp{type=double} -> - Reg = hipe_x86:temp_reg(Opnd), - case hipe_x86:temp_is_allocatable(Opnd) of - true -> - case tuple_size(TempMap) > Reg of - true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> false - end; - false -> true - end; - _ -> false - end, - %% io:format("Op ~w mem: ~w\n",[Opnd,R]), - R. - -%%% Check if an operand is a spilled Temp. - -%%src_is_spilled(Src, TempMap) -> -%% case hipe_x86:is_temp(Src) of -%% true -> -%% Reg = hipe_x86:temp_reg(Src), -%% case hipe_x86:temp_is_allocatable(Src) of -%% true -> -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end; -%% false -> false -%% end. - -%% is_spilled(Temp, TempMap) -> -%% case hipe_x86:temp_is_allocatable(Temp) of -%% true -> -%% Reg = hipe_x86:temp_reg(Temp), -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end. + case Opnd of + #x86_mem{} -> true; + #x86_temp{type=double} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + false -> true + end; + _ -> false + end. %%% Make Reg a clone of Dst (attach Dst's type to Reg). -clone(Dst) -> +clone(Dst, Strategy) -> Type = case Dst of #x86_mem{} -> hipe_x86:mem_type(Dst); #x86_temp{} -> hipe_x86:temp_type(Dst) end, - spill_temp(Type). - -spill_temp(Type) -> + spill_temp(Type, Strategy). + +spill_temp(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp(double, 'linearscan') -> + hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(no_context), double); +spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged -> + %% We can make a new temp here since we have yet to allocate registers for + %% these types hipe_x86:mk_new_temp(Type). %%% Make a certain reg into a clone of Dst diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index 780c2cc547..7c6965b938 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -52,6 +52,7 @@ tailcall_clobbered/0, temp0/0, temp1/0, + sse2_temp0/0, %% fixed/0, wordsize/0 ]). @@ -107,6 +108,8 @@ heap_limit_offset() -> ?P_HP_LIMIT. -define(TEMP0, ?R14). -define(TEMP1, ?R13). +-define(SSE2_TEMP0, 00). + -define(PROC_POINTER, ?RBP). reg_name(R) -> @@ -204,6 +207,8 @@ allocatable() -> allocatable_sse2() -> [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15 +sse2_temp0() -> ?SSE2_TEMP0. + allocatable_x87() -> [0,1,2,3,4,5,6]. @@ -248,6 +253,9 @@ ret(N) -> _ -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87() UNION +%% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in +%% hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?RAX,tagged},{?RAX,untagged}, % does the RA strip the type or not? {?RDX,tagged},{?RDX,untagged}, diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl new file mode 100644 index 0000000000..ea6b6cb9ba --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_sse2.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% Fix {mem, mem} floating point operations that result from linear scan +%% allocated floats. + +-module(hipe_amd64_sse2). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(CFG) -> + hipe_x86_cfg:map_bbs(fun do_bb/2, CFG). + +do_bb(_Lbl, BB) -> + Code = do_insns(hipe_bb:code(BB), []), + hipe_bb:code_update(BB, Code). + +do_insns([I|Insns], Accum) -> + NewIs = do_insn(I), + do_insns(Insns, lists:reverse(NewIs, Accum)); +do_insns([], Accum) -> + lists:reverse(Accum). + +do_insn(I) -> + case I of + #fp_binop{} -> do_fp_binop(I); + #fmove{} -> do_fmove(I); + _ -> [I] + end. + +do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fp_binop{src=Src}]. + +do_fmove(I = #fmove{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fmove{src=Src}]. + +fix_binary(Src0, Dst) -> + case is_mem_opnd(Src0) of + false -> {[], Src0}; + true -> + case is_mem_opnd(Dst) of + false -> {[], Src0}; + true -> + Src1 = spill_temp(), + {[hipe_x86:mk_fmove(Src0, Src1)], Src1} + end + end. + +is_mem_opnd(#x86_fpreg{reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=double, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=_, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured(Reg); +is_mem_opnd(#x86_mem{}) -> true. + +spill_temp() -> + hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double). diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_subst.erl index 6da3f44cd3..7d0f06684b 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl +++ b/lib/hipe/amd64/hipe_amd64_subst.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. +%% Copyright Ericsson AB 2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -18,4 +18,4 @@ %% %CopyrightEnd% %% --include("../x86/hipe_x86_ra_x87_ls.erl"). +-include("../x86/hipe_x86_subst.erl"). diff --git a/lib/hipe/arm/Makefile b/lib/hipe/arm/Makefile index 00b6732afa..ed2eccf428 100644 --- a/lib/hipe/arm/Makefile +++ b/lib/hipe/arm/Makefile @@ -61,6 +61,7 @@ MODULES=hipe_arm \ hipe_arm_ra_naive \ hipe_arm_ra_postconditions \ hipe_arm_registers \ + hipe_arm_subst \ hipe_rtl_to_arm HRL_FILES=hipe_arm.hrl diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl index f2fa0a5164..2fb6675da9 100644 --- a/lib/hipe/arm/hipe_arm_cfg.erl +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1]). @@ -35,6 +36,7 @@ -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_arm.hrl"). -include("../flow/cfg.hrl"). diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl index f57b0e601c..f92cf4f82a 100644 --- a/lib/hipe/arm/hipe_arm_defuse.erl +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -22,6 +22,7 @@ -module(hipe_arm_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1]). -include("hipe_arm.hrl"). %%% @@ -55,6 +56,12 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_arm:mk_temp(R, T) || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()]. diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl index a4b2f9c73c..55651d7180 100644 --- a/lib/hipe/arm/hipe_arm_finalise.erl +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -20,13 +20,17 @@ %% -module(hipe_arm_finalise). --export([finalise/1]). +-export([finalise/2]). -include("hipe_arm.hrl"). -finalise(Defun) -> +finalise(Defun, Options) -> #defun{code=Code0} = Defun, - Code1 = peep(expand(Code0)), - Defun#defun{code=Code1}. + Code1Rev = expand(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peep(Code1Rev); + false -> lists:reverse(Code1Rev) + end, + Defun#defun{code=Code2}. expand(Insns) -> expand_list(Insns, []). @@ -34,7 +38,7 @@ expand(Insns) -> expand_list([I|Insns], Accum) -> expand_list(Insns, expand_insn(I, Accum)); expand_list([], Accum) -> - lists:reverse(Accum). + Accum. expand_insn(I, Accum) -> case I of @@ -63,12 +67,67 @@ expand_insn(I, Accum) -> [I|Accum] end. -peep(Insns) -> - peep_list(Insns, []). +%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly +%% ordered list). This way, we can do replacements that would take multiple +%% passes with an in-order peephole optimiser. +%% +%% N.B., if a rule wants to produce multiple instructions (even if some of them +%% are unchanged, it should push the additional instructions on the More list, +%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns) +%% should have been peepholed previously. +peep(RevInsns) -> + peep_list_skip([], RevInsns). + +peep_list([#b_label{'cond'='al',label=Label} + | (Insns = [#label{label=Label}|_])], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst} + ,am1=#arm_temp{reg=Dst}}|Insns], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}} + |Insns], More) when Imm > 0, Imm =< 8 -> + peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}} + |Insns], More); +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}} + |Insns], More) when Imm >= 24, Imm < 32 -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More); + +%% XXX: Load-after-store optimisation should also be applied to RTL, where it +%% can be more general, expose opportunities for constant propagation, etc. +peep_list([#store{stop='strb',src=Src,am2=Mem}=Str, + #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns], + [Str|More]); +peep_list([#store{stop='str',src=Src,am2=Mem}=Str, + #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]); + +peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}} + |Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={Mask band (bnot InvMask),0}} | Insns], More); + +%% XXX: The place that generates brain-dead code like the following should be +%% fixed rather than trying to patch it over here. +peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}} + | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More); + +peep_list(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list(Accum, []) -> + Accum. -peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> - peep_list(Insns, Accum); -peep_list([I|Insns], Accum) -> - peep_list(Insns, [I|Accum]); -peep_list([], Accum) -> - lists:reverse(Accum). +%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has +%% already been peeped or is otherwise uninteresting (such as empty). +peep_list_skip(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list_skip(Accum, []) -> + Accum. diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl index e1e441a967..9a349b47d3 100644 --- a/lib/hipe/arm/hipe_arm_frame.erl +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -27,16 +27,14 @@ -define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet -frame(Defun) -> - Formals = fix_formals(hipe_arm:defun_formals(Defun)), - Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_arm_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)), - CFG0 = hipe_arm_cfg:init(Defun), - Liveness = ?LIVENESS_ALL:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_arm_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = ?LIVENESS_ALL:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_arm_registers:nr_args(), Formals). @@ -51,32 +49,21 @@ do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_arm_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_arm_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label), - Block = hipe_arm_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); do_block([], _, Context, FPoff, RevCode) -> FPoff0 = context_framesize(Context), - if FPoff =:= FPoff0 -> []; - true -> exit({?MODULE,do_block,FPoff}) - end, + FPoff0 = FPoff, lists:reverse(RevCode, []). do_insn(I, LiveOut, Context, FPoff) -> @@ -543,39 +530,46 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr(Insns) -> +clobbers_lr(CFG) -> LRreg = hipe_arm_registers:lr(), LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'), LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'), - clobbers_lr(Insns, LRtagged, LRuntagged). - -clobbers_lr([I|Insns], LRtagged, LRuntagged) -> - Defs = hipe_arm_defuse:insn_def_gpr(I), - case lists:member(LRtagged, Defs) of - true -> true; - false -> - case lists:member(LRuntagged, Defs) of - true -> true; - false -> clobbers_lr(Insns, LRtagged, LRuntagged) - end - end; -clobbers_lr([], _LRtagged, _LRuntagged) -> false. + any_insn(fun(I) -> + Defs = hipe_arm_defuse:insn_def_gpr(I), + lists:member(LRtagged, Defs) + orelse lists:member(LRuntagged, Defs) + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_arm_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -604,16 +598,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_arm_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl index dce1193b24..8a7fa86394 100644 --- a/lib/hipe/arm/hipe_arm_main.erl +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -24,15 +24,17 @@ rtl_to_arm(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_arm:translate(RTL), + CFG1 = hipe_arm_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_arm_pp:pp(Defun1), - Defun2 = hipe_arm_ra:ra(Defun1, Options), + CFG2 = hipe_arm_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_arm_pp:pp(Defun2), - Defun3 = hipe_arm_frame:frame(Defun2), + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + CFG3 = hipe_arm_frame:frame(CFG2), + Defun3 = hipe_arm_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_arm_pp:pp(Defun3), - Defun4 = hipe_arm_finalise:finalise(Defun3), + Defun4 = hipe_arm_finalise:finalise(Defun3, Options), %% io:format("~w: after finalise\n", [?MODULE]), pp(Defun4, MFA, Options), {native, arm, {unprofiled, Defun4}}. diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl index 2f65e864fd..bfb649326c 100644 --- a/lib/hipe/arm/hipe_arm_ra.erl +++ b/lib/hipe/arm/hipe_arm_ra.erl @@ -22,36 +22,40 @@ -module(hipe_arm_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_arm_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of %% true -> -%% hipe_regalloc_loop:ra_fp(Defun0, Options, +%% FPLiveness0 = hipe_arm_specific_fp:analyze(CFG0, no_context), +%% hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, %% hipe_coalescing_regalloc, -%% hipe_arm_specific_fp); +%% hipe_arm_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_arm_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG1)), + GPLiveness1 = hipe_arm_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, + hipe_graph_coloring_regalloc); linear_scan -> - hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_arm_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_arm_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_arm_pp:pp(Defun2), - hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + hipe_arm_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_arm_specific, no_context). diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl index 4faeadcd7f..2a3fded147 100644 --- a/lib/hipe/arm/hipe_arm_ra_finalise.erl +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -23,12 +23,13 @@ -export([finalise/3]). -include("hipe_arm.hrl"). -finalise(Defun, TempMap, _FPMap0=[]) -> - Code = hipe_arm:defun_code(Defun), - {_, SpillLimit} = hipe_arm:defun_var_range(Defun), +finalise(CFG, TempMap, _FPMap0=[]) -> + {_, SpillLimit} = hipe_gensym:var_range(arm), Map = mk_ra_map(TempMap, SpillLimit), - NewCode = ra_code(Code, Map, []), - Defun#defun{code=NewCode}. + hipe_arm_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map) end, CFG). + +ra_bb(BB, Map) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])). ra_code([I|Insns], Map, Accum) -> ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl index d9a360d00c..0aa888da99 100644 --- a/lib/hipe/arm/hipe_arm_ra_ls.erl +++ b/lib/hipe/arm/hipe_arm_ra_ls.erl @@ -21,37 +21,35 @@ %%% Linear Scan register allocator for ARM -module(hipe_arm_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_arm_cfg:init(NewDefun), - SpillLimit = hipe_arm_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_arm_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_arm_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_arm_registers:allocatable_gpr()-- [hipe_arm_registers:temp3(), hipe_arm_registers:temp2(), hipe_arm_registers:temp1()], [hipe_arm_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_arm_specific), - {NewDefun, _DidSpill} = + hipe_arm_specific, no_context), + {NewCFG, _DidSpill} = hipe_arm_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context), {SpillMap, _NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_arm_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_arm_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl index 6201269f44..395beff292 100644 --- a/lib/hipe/arm/hipe_arm_ra_naive.erl +++ b/lib/hipe/arm/hipe_arm_ra_naive.erl @@ -20,11 +20,11 @@ %% -module(hipe_arm_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_arm.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_arm_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl index 40978e65f6..412524e2e6 100644 --- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_arm.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(arm)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_arm_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_arm_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_arm_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl index dcf039676b..3ecf2f2fdb 100644 --- a/lib/hipe/arm/hipe_arm_registers.erl +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -180,6 +180,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, {?R1,tagged},{?R1,untagged}, diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl new file mode 100644 index 0000000000..4d077f3cd6 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_subst.erl @@ -0,0 +1,112 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_arm_subst). +-export([insn_temps/2]). +-include("hipe_arm.hrl"). + +%% These should be moved to hipe_arm and exported +-type temp() :: #arm_temp{}. +-type shiftop() :: lsl | lsr | asr | ror. +-type imm4() :: 0..15. +-type imm5() :: 0..31. +-type imm8() :: 0..255. +-type am1() :: {imm8(),imm4()} + | temp() + | {temp(), rrx} + | {temp(), shiftop(), imm5()} + | {temp(), shiftop(), temp()}. +-type am2() :: #am2{}. +-type am3() :: #am3{}. +-type arg() :: temp() | integer(). +-type funv() :: #arm_mfa{} | #arm_prim{} | temp(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + AM1 = fun(O) -> am1_temps(T, O) end, + AM2 = fun(O) -> am2_temps(T, O) end, + AM3 = fun(O) -> am3_temps(T, O) end, + Arg = fun(O) -> arg_temps(T, O) end, + case I of + #alu {dst=D,src=L,am1=R} -> I#alu{dst=T(D),src=T(L),am1=AM1(R)}; + #cmp {src=L,am1=R} -> I#cmp {src=T(L),am1=AM1(R)}; + #load {dst=D,am2=S} -> I#load {dst=T(D),am2=AM2(S)}; + #ldrsb {dst=D,am3=S} -> I#ldrsb {dst=T(D),am3=AM3(S)}; + #move {dst=D,am1=S} -> I#move {dst=T(D),am1=AM1(S)}; + #pseudo_move{dst=D,src=S} -> I#pseudo_move {dst=T(D),src=T(S)}; + #store {src=S,am2=D} -> I#store {src=T(S),am2=AM2(D)}; + #b_label{} -> I; + #comment{} -> I; + #label{} -> I; + #pseudo_bc{} -> I; + #pseudo_blr{} -> I; + #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)}; + #pseudo_call_prepare{} -> I; + #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; + #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} -> + I#pseudo_switch{jtab=T(J),index=T(Ix)}; + #pseudo_tailcall{funv=F,stkargs=Stk} -> + I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #smull{dstlo=DL,dsthi=DH,src1=L,src2=R} -> + I#smull{dstlo=T(DL),dsthi=T(DH),src1=T(L),src2=T(R)} + end. + +-spec am1_temps(subst_fun(), am1()) -> am1(). +am1_temps(_SubstTemp, T={C,R}) when is_integer(C), is_integer(R) -> T; +am1_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T); +am1_temps(SubstTemp, {T=#arm_temp{},rrx}) -> {SubstTemp(T),rrx}; +am1_temps(SubstTemp, {A=#arm_temp{},Op,B=#arm_temp{}}) when is_atom(Op) -> + {SubstTemp(A),Op,SubstTemp(B)}; +am1_temps(SubstTemp, {T=#arm_temp{},Op,I}) when is_atom(Op), is_integer(I) -> + {SubstTemp(T),Op,I}. + +-spec am2_temps(subst_fun(), am2()) -> am2(). +am2_temps(SubstTemp, T=#am2{src=A=#arm_temp{},offset=O0}) -> + O = case O0 of + _ when is_integer(O0) -> O0; + #arm_temp{} -> SubstTemp(O0); + {B=#arm_temp{},rrx} -> {SubstTemp(B),rrx}; + {B=#arm_temp{},Op,I} when is_atom(Op), is_integer(I) -> + {SubstTemp(B),Op,I} + end, + T#am2{src=SubstTemp(A),offset=O}. + +-spec am3_temps(subst_fun(), am3()) -> am3(). +am3_temps(SubstTemp, T=#am3{src=A=#arm_temp{},offset=O0}) -> + O = case O0 of + _ when is_integer(O0) -> O0; + #arm_temp{} -> SubstTemp(O0) + end, + T#am3{src=SubstTemp(A),offset=O}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#arm_mfa{}) -> M; +funv_temps(_SubstTemp, P=#arm_prim{}) -> P; +funv_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl index 93342aba33..2f9181d517 100644 --- a/lib/hipe/arm/hipe_rtl_to_arm.erl +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -138,7 +138,6 @@ mk_shift(S, Dst, Src1, ShiftOp, Src2) -> end. mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)). @@ -179,7 +178,6 @@ mk_arith(S, Dst, Src1, ArithOp, Src2) -> end. mk_arith_ii(S, Dst, Src1, ArithOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)). @@ -277,7 +275,6 @@ mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) -> end. mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> - io:format("~w: RTL branch with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Imm1, mk_branch_ri(Tmp, Cond, Imm2, @@ -472,7 +469,6 @@ mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> end. mk_load_ii(Dst, Base1, Base2, LdOp) -> - io:format("~w: RTL load with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_load_ri(Dst, Tmp, Base2, LdOp)). @@ -485,7 +481,6 @@ mk_load_rr(Dst, Base1, Base2, LdOp) -> [hipe_arm:mk_load(LdOp, Dst, Am2)]. mk_ldrsb_ii(Dst, Base1, Base2) -> - io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_ldrsb_ri(Dst, Tmp, Base2)). @@ -543,7 +538,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -567,13 +562,28 @@ mk_store(Src, Base, Offset, StoreSize) -> end. mk_store2(Src, Base, Offset, StOp) -> - case hipe_arm:is_temp(Offset) of + case hipe_arm:is_temp(Base) of true -> - mk_store_rr(Src, Base, Offset, StOp); - _ -> - mk_store_ri(Src, Base, Offset, StOp) + case hipe_arm:is_temp(Offset) of + true -> + mk_store_rr(Src, Base, Offset, StOp); + _ -> + mk_store_ri(Src, Base, Offset, StOp) + end; + false -> + case hipe_arm:is_temp(Offset) of + true -> + mk_store_ri(Src, Offset, Base, StOp); + _ -> + mk_store_ii(Src, Base, Offset, StOp) + end end. - + +mk_store_ii(Src, Base, Offset, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Offset, StOp)). + mk_store_ri(Src, Base, Offset, StOp) -> hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []). diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 230fce2e68..c9cc1cfe25 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -560,6 +560,9 @@ type(erlang, byte_size, 1, Xs, Opaques) -> strict(erlang, byte_size, 1, Xs, fun (_) -> t_non_neg_integer() end, Opaques); %% Guard bif, needs to be here. +type(erlang, ceil, 1, Xs, Opaques) -> + strict(erlang, ceil, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. %% Also much more expressive than anything you could write in a spec... type(erlang, element, 2, Xs, Opaques) -> strict(erlang, element, 2, Xs, @@ -588,6 +591,9 @@ type(erlang, element, 2, Xs, Opaques) -> type(erlang, float, 1, Xs, Opaques) -> strict(erlang, float, 1, Xs, fun (_) -> t_float() end, Opaques); %% Guard bif, needs to be here. +type(erlang, floor, 1, Xs, Opaques) -> + strict(erlang, floor, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. type(erlang, hd, 1, Xs, Opaques) -> strict(erlang, hd, 1, Xs, fun ([X]) -> t_cons_hd(X) end, Opaques); type(erlang, info, 1, Xs, _) -> type(erlang, system_info, 1, Xs); % alias @@ -2341,6 +2347,9 @@ arg_types(erlang, bit_size, 1) -> %% Guard bif, needs to be here. arg_types(erlang, byte_size, 1) -> [t_bitstr()]; +%% Guard bif, needs to be here. +arg_types(erlang, ceil, 1) -> + [t_number()]; arg_types(erlang, halt, 0) -> []; arg_types(erlang, halt, 1) -> @@ -2361,6 +2370,9 @@ arg_types(erlang, element, 2) -> arg_types(erlang, float, 1) -> [t_number()]; %% Guard bif, needs to be here. +arg_types(erlang, floor, 1) -> + [t_number()]; +%% Guard bif, needs to be here. arg_types(erlang, hd, 1) -> [t_cons()]; arg_types(erlang, info, 1) -> diff --git a/lib/hipe/flow/cfg.inc b/lib/hipe/flow/cfg.inc index 0bad2a8dd7..cb5f397f64 100644 --- a/lib/hipe/flow/cfg.inc +++ b/lib/hipe/flow/cfg.inc @@ -32,6 +32,8 @@ %% bb(CFG, Label) - returns the basic block named 'Label' from the CFG. %% bb_add(CFG, Label, NewBB) - makes NewBB the basic block associated %% with Label. +%% map_bbs(Fun, CFG) - map over all code without changing control flow. +%% fold_bbs(Fun, Acc, CFG) - fold over the basic blocks in a CFG. %% succ(Map, Label) - returns a list of successors of basic block 'Label'. %% pred(Map, Label) - returns the predecessors of basic block 'Label'. %% fallthrough(CFG, Label) - returns fall-through successor of basic @@ -89,6 +91,7 @@ -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -endif. %%===================================================================== @@ -307,11 +310,7 @@ redirect_phis([I|Rest], OldPred, NewPred, Acc) -> %% @doc Adds a new basic block to a CFG (or updates an existing block). bb_add(CFG, Label, NewBB) -> %% Asserting that the NewBB is a legal basic block - Last = hipe_bb:last(NewBB), - case is_branch(Last) of - true -> ok; - false -> throw({?MODULE, {"Basic block ends without branch", Last}}) - end, + Last = assert_bb(NewBB), %% The order of the elements from branch_successors/1 is %% significant. It determines the basic block order when the CFG is %% converted to linear form. That order may have been tuned for @@ -339,11 +338,53 @@ bb_add(CFG, Label, NewBB) -> HT2, OldSucc -- Succ), CFG#cfg{table = HT3}. +-ifdef(MAP_FOLD_NEEDED). +-spec map_bbs(fun((cfg_lbl(), hipe_bb:bb()) -> hipe_bb:bb()), cfg()) -> cfg(). +%% @doc Map over the code in a CFG without changing any control flow. +map_bbs(Fun, CFG = #cfg{table=HT0}) -> + HT = gb_trees:map( + fun(Lbl, {OldBB, OldSucc, OldPred}) -> + NewBB = Fun(Lbl, OldBB), + %% Assert preconditions + NewLast = assert_bb(NewBB), + OldSucc = remove_duplicates(branch_successors(NewLast)), + {NewBB, OldSucc, OldPred} + end, HT0), + CFG#cfg{table=HT}. + +-spec fold_bbs(fun((cfg_lbl(), hipe_bb:bb(), Acc) -> Acc), Acc, cfg()) -> Acc. +%% @doc Fold over the basic blocks in a CFG in unspecified order. +fold_bbs(Fun, InitAcc, #cfg{table=HT}) -> + gb_trees_fold(fun(Lbl, {BB, _, _}, Acc) -> Fun(Lbl, BB, Acc) end, + InitAcc, HT). + +gb_trees_fold(Fun, InitAcc, Tree) -> + gb_trees_fold_1(Fun, InitAcc, gb_trees:iterator(Tree)). + +gb_trees_fold_1(Fun, InitAcc, Iter0) -> + case gb_trees:next(Iter0) of + none -> InitAcc; + {Key, Value, Iter} -> + gb_trees_fold_1(Fun, Fun(Key, Value, InitAcc), Iter) + end. +-endif. % MAP_FOLD_NEEDED + +assert_bb(BB) -> + assert_bb_is(hipe_bb:code(BB)). + +assert_bb_is([Last]) -> + true = is_branch(Last), + Last; +assert_bb_is([I|Is]) -> + false = is_branch(I), + false = is_label(I), + assert_bb_is(Is). + remove_pred(HT, FromL, PredL) -> case gb_trees:lookup(FromL, HT) of {value, {Block, Succ, Preds}} -> Code = hipe_bb:code(Block), - NewCode = remove_pred_from_phis(Code, PredL, []), + NewCode = remove_pred_from_phis(PredL, Code), NewBlock = hipe_bb:code_update(Block, NewCode), gb_trees:update(FromL, {NewBlock,Succ,lists:delete(PredL,Preds)}, HT); none -> @@ -374,20 +415,20 @@ add_pred(HT, ToL, PredL) -> -ifdef(CFG_CAN_HAVE_PHI_NODES). %% phi-instructions in a removed block's successors must be aware of %% the change. -remove_pred_from_phis(List = [I|Left], Label, Acc) -> +remove_pred_from_phis(Label, List = [I|Left]) -> case is_phi(I) of - true -> - NewAcc = [phi_remove_pred(I, Label)|Acc], - remove_pred_from_phis(Left, Label, NewAcc); + true -> + NewI = phi_remove_pred(I, Label), + [NewI | remove_pred_from_phis(Label, Left)]; false -> - lists:reverse(Acc) ++ List + List end; -remove_pred_from_phis([], _Label, Acc) -> - lists:reverse(Acc). +remove_pred_from_phis(_Label, []) -> + []. -else. %% this is used for code representations like those of back-ends which %% do not have phi-nodes. -remove_pred_from_phis(Code, _Label, _Acc) -> +remove_pred_from_phis(_Label, Code) -> Code. -endif. @@ -927,24 +968,52 @@ merge(BB, BB2, BB2_Label) -> remove_unreachable_code(CFG) -> Start = start_label(CFG), - Reachable = find_reachable([Start], CFG, gb_sets:from_list([Start])), - %% Reachable is an ordset: it comes from gb_sets:to_list/1. - %% So use ordset:subtract instead of '--' below. - Labels = ordsets:from_list(labels(CFG)), - case ordsets:subtract(Labels, Reachable) of - [] -> - CFG; + %% No unreachable block will make another block reachable, so no fixpoint + %% looping is required + Reachable = find_reachable([], [Start], CFG, #{Start=>[]}), + case [L || L <- labels(CFG), not maps:is_key(L, Reachable)] of + [] -> CFG; Remove -> - NewCFG = lists:foldl(fun(X, Acc) -> bb_remove(Acc, X) end, CFG, Remove), - remove_unreachable_code(NewCFG) + HT0 = CFG#cfg.table, + HT1 = lists:foldl(fun gb_trees:delete/2, HT0, Remove), + ReachableP = fun(Lbl) -> maps:is_key(Lbl, Reachable) end, + HT = gb_trees:map(fun(_,B)->prune_preds(B, ReachableP)end, HT1), + CFG#cfg{table=HT} end. -find_reachable([Label|Left], CFG, Acc) -> - NewAcc = gb_sets:add(Label, Acc), - Succ = succ(CFG, Label), - find_reachable([X || X <- Succ, not gb_sets:is_member(X, Acc)] ++ Left, - CFG, NewAcc); -find_reachable([], _CFG, Acc) -> - gb_sets:to_list(Acc). +find_reachable([], [], _CFG, Acc) -> Acc; +find_reachable([Succ|Succs], Left, CFG, Acc) -> + case Acc of + #{Succ := _} -> find_reachable(Succs, Left, CFG, Acc); + #{} -> find_reachable(Succs, [Succ|Left], CFG, Acc#{Succ => []}) + end; +find_reachable([], [Label|Left], CFG, Acc) -> + find_reachable(succ(CFG, Label), Left, CFG, Acc). + +%% Batch prune unreachable predecessors. Asymptotically faster than deleting +%% unreachable blocks one at a time with bb_remove, at least when +%% CFG_CAN_HAVE_PHI_NODES is undefined. Otherwise a phi_remove_preds might be +%% needed to achieve that. +prune_preds(B={Block, Succ, Preds}, ReachableP) -> + case lists:partition(ReachableP, Preds) of + {_, []} -> B; + {NewPreds, Unreach} -> + NewCode = remove_preds_from_phis(Unreach, hipe_bb:code(Block)), + {hipe_bb:code_update(Block, NewCode), Succ, NewPreds} + end. +-ifdef(CFG_CAN_HAVE_PHI_NODES). +remove_preds_from_phis(_, []) -> []; +remove_preds_from_phis(Preds, List=[I|Left]) -> + case is_phi(I) of + false -> List; + true -> + NewI = lists:foldl(fun(L,IA)->phi_remove_pred(IA,L)end, + I, Preds), + [NewI | remove_preds_from_phis(Preds, Left)] + end. +-else. +remove_preds_from_phis(_, Code) -> Code. -endif. + +-endif. %% -ifdef(REMOVE_UNREACHABLE_CODE) diff --git a/lib/hipe/flow/hipe_bb.erl b/lib/hipe/flow/hipe_bb.erl index 2da3a6dc99..08f5e0a0cb 100644 --- a/lib/hipe/flow/hipe_bb.erl +++ b/lib/hipe/flow/hipe_bb.erl @@ -41,6 +41,8 @@ -include("hipe_bb.hrl"). +-export_type([bb/0]). + %% %% Constructs a basic block. %% Returns a basic block: {bb, Code} diff --git a/lib/hipe/flow/liveness.inc b/lib/hipe/flow/liveness.inc index a1caa3e0ad..bffaa4e3df 100644 --- a/lib/hipe/flow/liveness.inc +++ b/lib/hipe/flow/liveness.inc @@ -49,6 +49,10 @@ -endif. -include("../flow/cfg.hrl"). +-include("../main/hipe.hrl"). + +-opaque liveness() :: map(). +-export_type([liveness/0]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -72,7 +76,7 @@ %% The generic liveness analysis %% --spec analyze(cfg()) -> gb_trees:tree(). +-spec analyze(cfg()) -> liveness(). -ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET). analyze(CFG) -> @@ -188,6 +192,7 @@ update_livein(Label, NewLiveIn, Liveness) -> %% %% LiveOut for a block is the union of the successors LiveIn %% +-spec liveout(liveness(), _) -> [_]. liveout(Liveness, L) -> Succ = successors(L, Liveness), @@ -210,7 +215,7 @@ successors(L, Liveness) -> {_GK, _LiveIn, Successors} = liveness_lookup(L, Liveness), Successors. --spec livein(gb_trees:tree(), _) -> [_]. +-spec livein(liveness(), _) -> [_]. livein(Liveness, L) -> {_GK, LiveIn, _Successors} = liveness_lookup(L, Liveness), @@ -292,18 +297,15 @@ strip([{_,Y}|Xs]) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). + liveness_init(List) -> - liveness_init(List, gb_trees:empty()). + maps:from_list(List). -liveness_init([{Lbl, Data}|Left], Acc) -> - liveness_init(Left, gb_trees:insert(Lbl, Data, Acc)); -liveness_init([], Acc) -> - Acc. - liveness_lookup(Label, Liveness) -> - gb_trees:get(Label, Liveness). + maps:get(Label, Liveness). liveness_update(Label, Val, Liveness) -> - gb_trees:update(Label, Val, Liveness). + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/icode/hipe_icode_bincomp.erl b/lib/hipe/icode/hipe_icode_bincomp.erl index 5a27519141..5ee6fe2c87 100644 --- a/lib/hipe/icode/hipe_icode_bincomp.erl +++ b/lib/hipe/icode/hipe_icode_bincomp.erl @@ -40,8 +40,8 @@ -spec cfg(cfg()) -> cfg(). cfg(Cfg1) -> - StartLbls = ordsets:from_list([hipe_icode_cfg:start_label(Cfg1)]), - find_bs_get_integer(StartLbls, Cfg1, StartLbls). + StartLbl = hipe_icode_cfg:start_label(Cfg1), + find_bs_get_integer([StartLbl], Cfg1, set_from_list([StartLbl])). find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> BB = hipe_icode_cfg:bb(Cfg, Lbl), @@ -55,10 +55,10 @@ find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> not_ok -> Cfg end, - Succs = ordsets:from_list(hipe_icode_cfg:succ(NewCfg, Lbl)), - NewSuccs = ordsets:subtract(Succs, Visited), - NewLbls = ordsets:union(NewSuccs, Rest), - NewVisited = ordsets:union(NewSuccs, Visited), + Succs = hipe_icode_cfg:succ(NewCfg, Lbl), + NewSuccs = not_visited(Succs, Visited), + NewLbls = NewSuccs ++ Rest, + NewVisited = set_union(set_from_list(NewSuccs), Visited), find_bs_get_integer(NewLbls, NewCfg, NewVisited); find_bs_get_integer([], Cfg, _) -> Cfg. @@ -177,3 +177,19 @@ make_butlast([{Res, Size}|Rest], Var) -> [Var, hipe_icode:mk_const((1 bsl Size)-1)]), hipe_icode:mk_primop([NewVar], 'bsr', [Var, hipe_icode:mk_const(Size)]) |make_butlast(Rest, NewVar)]. + +%%-------------------------------------------------------------------- +%% Sets + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := _} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_coordinator.erl b/lib/hipe/icode/hipe_icode_coordinator.erl index d2f8748535..b073954ce7 100644 --- a/lib/hipe/icode/hipe_icode_coordinator.erl +++ b/lib/hipe/icode/hipe_icode_coordinator.erl @@ -106,12 +106,29 @@ handle_no_change_done(MFA, {Queue, Busy}) -> {Queue, Busy -- [MFA]}. last_action(PM, ServerPid, Mod, All) -> - lists:foreach(fun (MFA) -> - gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, - receive - {done_rewrite, MFA} -> ok - end - end, All). + last_action(PM, ServerPid, Mod, All, []). + +last_action(_, _, _, [], []) -> ok; +last_action(PM, ServerPid, Mod, [], [MFA|Busy]) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, [], Busy) + end; +last_action(PM, ServerPid, Mod, All0, Busy) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, All0, Busy -- [MFA]) + after 0 -> + case ?MAX_CONCURRENT - length(Busy) of + X when is_integer(X), X > 0 -> + [MFA|All1] = All0, + gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, + last_action(PM, ServerPid, Mod, All1, [MFA|Busy]); + X when is_integer(X) -> + Busy1 = receive {done_rewrite, MFA} -> Busy -- [MFA] end, + last_action(PM, ServerPid, Mod, All0, Busy1) + end + end. restart_funs({Queue, Busy} = QB, PM, All, ServerPid) -> case ?MAX_CONCURRENT - length(Busy) of diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl index 12ed796690..af160769a1 100644 --- a/lib/hipe/icode/hipe_icode_range.erl +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -73,8 +73,8 @@ -type final_fun() :: fun((mfa(), [range()]) -> 'ok'). -type data() :: {mfa(), args_fun(), call_fun(), final_fun()}. -type label() :: non_neg_integer(). --type info() :: gb_trees:tree(). --type work_list() :: {[label()], [label()], sets:set()}. +-type info() :: map(). +-type work_list() :: {[label()], [label()], set(label())}. -type variable() :: #icode_variable{}. -type annotated_variable() :: #icode_variable{}. -type argument() :: #icode_const{} | variable(). @@ -82,10 +82,9 @@ -type instr_split_info() :: {icode_instr(), [{label(),info()}]}. -type last_instr_return() :: {instr_split_info(), range()}. --record(state, {info_map = gb_trees:empty() :: info(), - counter = dict:new() :: dict:dict(), - cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), +-record(state, {info_map = #{} :: info(), + cfg :: cfg(), + liveness :: hipe_icode_ssa:liveness(), ret_type :: range(), lookup_fun :: call_fun(), result_action :: final_fun()}). @@ -187,17 +186,16 @@ safe_analyse(CFG, Data={MFA,_,_,_}) -> rewrite_blocks(State) -> CFG = state__cfg(State), Start = hipe_icode_cfg:start_label(CFG), - rewrite_blocks([Start], State, [Start]). + rewrite_blocks([Start], State, set_from_list([Start])). --spec rewrite_blocks([label()], state(), [label()]) -> state(). +-spec rewrite_blocks([label()], state(), set(label())) -> state(). rewrite_blocks([Next|Rest], State, Visited) -> Info = state__info_in(State, Next), {NewState, NewLabels} = analyse_block(Next, Info, State, true), - NewLabelsSet = ordsets:from_list(NewLabels), - RealNew = ordsets:subtract(NewLabelsSet, Visited), - NewVisited = ordsets:union([RealNew, Visited, [Next]]), - NewWork = ordsets:union([RealNew, Rest]), + RealNew = not_visited(NewLabels, Visited), + NewVisited = set_union(set_from_list(RealNew), Visited), + NewWork = RealNew ++ Rest, rewrite_blocks(NewWork, NewState, NewVisited); rewrite_blocks([], State, _) -> State. @@ -1661,8 +1659,8 @@ state__init(Cfg, {MFA, ArgsFun, CallFun, FinalFun}) -> false -> NewParams = lists:zipwith(fun update_info/2, Params, Ranges), NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), - Info = enter_defines(NewParams, gb_trees:empty()), - InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), + Info = enter_defines(NewParams, #{}), + InfoMap = #{{Start, in} => Info}, #state{info_map=InfoMap, cfg=NewCfg, liveness=Liveness, ret_type=none_type(), lookup_fun=CallFun, result_action=FinalFun} @@ -1700,7 +1698,7 @@ state__info_in(S, Label) -> state__info(S, {Label, in}). state__info(#state{info_map=IM}, Key) -> - gb_trees:get(Key, IM). + maps:get(Key, IM). state__update_info(State, LabelInfo, Rewrite) -> update_info(LabelInfo, State, [], Rewrite). @@ -1721,60 +1719,58 @@ update_info([], State, LabelAcc, _Rewrite) -> state__info_in_update(S=#state{info_map=IM,liveness=Liveness}, Label, Info) -> LabelIn = {Label, in}, - case gb_trees:lookup(LabelIn, IM) of - none -> + case IM of + #{LabelIn := OldInfo} -> + OldVars = maps:keys(OldInfo), + case join_info_in(OldVars, OldInfo, Info) of + fixpoint -> + fixpoint; + NewInfo -> + S#state{info_map=IM#{LabelIn := NewInfo}} + end; + _ -> LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label), NamesLiveIn = [hipe_icode:var_name(Var) || Var <- LiveIn, hipe_icode:is_var(Var)], - OldInfo = gb_trees:empty(), + OldInfo = #{}, case join_info_in(NamesLiveIn, OldInfo, Info) of fixpoint -> - S#state{info_map=gb_trees:insert(LabelIn, OldInfo, IM)}; - NewInfo -> - S#state{info_map=gb_trees:enter(LabelIn, NewInfo, IM)} - end; - {value, OldInfo} -> - OldVars = gb_trees:keys(OldInfo), - case join_info_in(OldVars, OldInfo, Info) of - fixpoint -> - fixpoint; + S#state{info_map=IM#{LabelIn => OldInfo}}; NewInfo -> - S#state{info_map=gb_trees:update(LabelIn, NewInfo, IM)} + S#state{info_map=IM#{LabelIn => NewInfo}} end end. join_info_in(Vars, OldInfo, NewInfo) -> - case join_info_in(Vars, OldInfo, NewInfo, gb_trees:empty(), false) of + case join_info_in(Vars, OldInfo, NewInfo, #{}, false) of {Res, true} -> Res; {_, false} -> fixpoint end. join_info_in([Var|Left], Info1, Info2, Acc, Changed) -> - Type1 = gb_trees:lookup(Var, Info1), - Type2 = gb_trees:lookup(Var, Info2), - case {Type1, Type2} of - {none, none} -> - NewTree = gb_trees:insert(Var, none_type(), Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {none, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {{value, Val}, none} -> - NewTree = gb_trees:insert(Var, Val, Acc), + case {Info1, Info2} of + {#{Var := Val}, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val}, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val1}, {value, Val2}} -> - {NewChanged, NewVal} = + {#{Var := Val1}, #{Var := Val2}} -> + {NewChanged, NewVal} = case sup(Val1, Val2) of Val1 -> {Changed, Val1}; Val -> {true, Val} end, - NewTree = gb_trees:insert(Var, NewVal, Acc), - join_info_in(Left, Info1, Info2, NewTree, NewChanged) + NewTree = Acc#{Var => NewVal}, + join_info_in(Left, Info1, Info2, NewTree, NewChanged); + {_, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, true); + {#{Var := Val}, _} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, Changed); + {_, _} -> + NewTree = Acc#{Var => none_type()}, + join_info_in(Left, Info1, Info2, NewTree, true) end; join_info_in([], _Info1, _Info2, Acc, NewChanged) -> {Acc, NewChanged}. @@ -1786,7 +1782,7 @@ enter_defines([], Info) -> Info. enter_define({PossibleVar, Range = #range{}}, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; false -> Info end; @@ -1795,7 +1791,7 @@ enter_define(PossibleVar, Info) -> true -> case hipe_icode:variable_annotation(PossibleVar) of {range_anno, #ann{range=Range}, _} -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; _ -> Info end; @@ -1810,11 +1806,10 @@ enter_vals(Ins, Info) -> lookup(PossibleVar, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - case gb_trees:lookup(hipe_icode:var_name(PossibleVar), Info) of - none -> - none_type(); - {value, Val} -> - Val + PossibleVarName = hipe_icode:var_name(PossibleVar), + case Info of + #{PossibleVarName := Val} -> Val; + _ -> none_type() end; false -> none_type() @@ -1828,10 +1823,10 @@ lookup(PossibleVar, Info) -> init_work(State) -> %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)), Labels = [hipe_icode_cfg:start_label(state__cfg(State))], - {Labels, [], sets:from_list(Labels)}. + {Labels, [], set_from_list(Labels)}. get_work({[Label|Left], List, Set}) -> - NewWork = {Left, List, sets:del_element(Label, Set)}, + NewWork = {Left, List, maps:remove(Label, Set)}, {Label, NewWork}; get_work({[], [], _Set}) -> fixpoint; @@ -1839,12 +1834,12 @@ get_work({[], List, Set}) -> get_work({lists:reverse(List), [], Set}). add_work(Work = {List1, List2, Set}, [Label|Left]) -> - case sets:is_element(Label, Set) of - true -> + case Set of + #{Label := _} -> add_work(Work, Left); - false -> + _ -> %% io:format("Adding work: ~w\n", [Label]), - add_work({List1, [Label|List2], sets:add_element(Label, Set)}, Left) + add_work({List1, [Label|List2], Set#{Label => []}}, Left) end; add_work(Work, []) -> Work. @@ -1959,3 +1954,21 @@ next_down_limit(X) when is_integer(X), X > -16#8000000 -> -16#8000000; next_down_limit(X) when is_integer(X), X > -16#80000000 -> -16#80000000; next_down_limit(X) when is_integer(X), X > -16#800000000000000 -> -16#800000000000000; next_down_limit(_X) -> neg_inf. + +%%-------------------------------------------------------------------- +%% Sets + +-type set(E) :: #{E => []}. + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := []} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_ssa.erl b/lib/hipe/icode/hipe_icode_ssa.erl index b222fbc7d2..aca13a2ff0 100644 --- a/lib/hipe/icode/hipe_icode_ssa.erl +++ b/lib/hipe/icode/hipe_icode_ssa.erl @@ -34,13 +34,16 @@ -define(LIVENESS, hipe_icode_liveness). -define(LIVENESS_NEEDED, true). +-export_type([liveness/0]). + -include("hipe_icode.hrl"). -include("../ssa/hipe_ssa.inc"). %% Declarations for exported functions which are Icode-specific. --spec ssa_liveness__analyze(#cfg{}) -> gb_trees:tree(). --spec ssa_liveness__livein(_, icode_lbl()) -> [#icode_variable{}]. -%% -spec ssa_liveness__livein(_, icode_lbl(), _) -> [#icode_var{}]. +-opaque liveness() :: liveness(icode_lbl(), #icode_variable{}). +-spec ssa_liveness__analyze(#cfg{}) -> liveness(). +-spec ssa_liveness__livein(liveness(), icode_lbl()) -> [#icode_variable{}]. +%% -spec ssa_liveness__livein(liveness(), icode_lbl(), _) -> [#icode_var{}]. %%---------------------------------------------------------------------- %% Auxiliary operations which seriously differ between Icode and RTL. diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl index 794c27ebcc..3f0e2998f1 100644 --- a/lib/hipe/icode/hipe_icode_type.erl +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -100,7 +100,7 @@ -record(state, {info_map = gb_trees:empty() :: gb_trees:tree(), cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), + liveness :: hipe_icode_ssa:liveness(), arg_types :: [erl_types:erl_type()], ret_type = [t_none()] :: [erl_types:erl_type()], lookupfun :: call_fun(), diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index f8487151d7..af2c02006d 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -49,12 +49,13 @@ hipe_amd64_ra_naive, hipe_amd64_ra_postconditions, hipe_amd64_ra_sse2_postconditions, - hipe_amd64_ra_x87_ls, hipe_amd64_registers, hipe_amd64_specific, hipe_amd64_specific_sse2, hipe_amd64_specific_x87, hipe_amd64_spill_restore, + hipe_amd64_sse2, + hipe_amd64_subst, hipe_amd64_x87, hipe_arm, hipe_arm_assemble, @@ -73,6 +74,7 @@ hipe_arm_ra_postconditions, hipe_arm_registers, hipe_arm_specific, + hipe_arm_subst, hipe_bb, hipe_beam_to_icode, hipe_coalescing_regalloc, @@ -142,9 +144,11 @@ hipe_ppc_registers, hipe_ppc_specific, hipe_ppc_specific_fp, + hipe_ppc_subst, hipe_profile, hipe_reg_worklists, hipe_regalloc_loop, + hipe_regalloc_prepass, hipe_rtl, hipe_rtl_arch, hipe_rtl_arith_32, @@ -171,6 +175,7 @@ hipe_rtl_to_sparc, hipe_rtl_to_x86, hipe_rtl_varmap, + hipe_segment_trees, hipe_sdi, hipe_sparc, hipe_sparc_assemble, @@ -193,6 +198,7 @@ hipe_sparc_registers, hipe_sparc_specific, hipe_sparc_specific_fp, + hipe_sparc_subst, hipe_spillcost, hipe_spillmin, hipe_spillmin_color, @@ -216,14 +222,14 @@ hipe_x86_ra_ls, hipe_x86_ra_naive, hipe_x86_ra_postconditions, - hipe_x86_ra_x87_ls, hipe_x86_registers, hipe_x86_specific, hipe_x86_specific_x87, hipe_x86_spill_restore, + hipe_x86_subst, hipe_x86_x87]}, {registered,[]}, {applications, [kernel,stdlib]}, {env, []}, {runtime_dependencies, ["syntax_tools-1.6.14","stdlib-2.5","kernel-3.0", - "erts-7.1","compiler-5.0"]}]}. + "erts-9.0","compiler-5.0"]}]}. diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index 6c525dd143..bee5da2195 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1118,9 +1118,10 @@ help_hiper() -> help_options() -> HostArch = erlang:system_info(hipe_architecture), - O1 = expand_options([o1], HostArch), - O2 = expand_options([o2], HostArch), - O3 = expand_options([o3], HostArch), + O0 = expand_options([o0] ++ ?COMPILE_DEFAULTS, HostArch), + O1 = expand_options([o1] ++ ?COMPILE_DEFAULTS, HostArch), + O2 = expand_options([o2] ++ ?COMPILE_DEFAULTS, HostArch), + O3 = expand_options([o3] ++ ?COMPILE_DEFAULTS, HostArch), io:format("HiPE Compiler Options\n" ++ " Boolean-valued options generally have corresponding " ++ "aliases `no_...',\n" ++ @@ -1139,15 +1140,16 @@ help_options() -> " pp_x86 = pp_native,\n" ++ " pp_amd64 = pp_native,\n" ++ " pp_ppc = pp_native,\n" ++ - " o0,\n" ++ - " o1 = ~p,\n" ++ + " o0 = ~p,\n" ++ + " o1 = ~p ++ o0,\n" ++ " o2 = ~p ++ o1,\n" ++ " o3 = ~p ++ o2.\n", [ordsets:from_list([verbose, debug, time, load, pp_beam, pp_icode, pp_rtl, pp_native, pp_asm, timeout]), expand_options([pp_all], HostArch), - O1 -- [o1], + O0 -- [o0], + (O1 -- O0) -- [o1], (O2 -- O1) -- [o2], (O3 -- O2) -- [o3]]), ok. @@ -1352,6 +1354,8 @@ opt_keys() -> pp_rtl_lcm, pp_rtl_ssapre, pp_rtl_linear, + ra_partitioned, + ra_prespill, regalloc, remove_comments, rtl_ssa, @@ -1382,8 +1386,15 @@ opt_keys() -> %% Definitions: +o0_opts(_TargetArch) -> + [concurrent_comp, {regalloc,linear_scan}]. + o1_opts(TargetArch) -> - Common = [inline_fp, pmatch, peephole], + Common = [inline_fp, pmatch, peephole, ra_prespill, ra_partitioned, + icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs, + rtl_ssa, rtl_ssa_const_prop, rtl_ssapre, + spillmin_color, use_indexing, remove_comments, + binary_opt, {regalloc,coalescing} | o0_opts(TargetArch)], case TargetArch of ultrasparc -> Common; @@ -1402,11 +1413,8 @@ o1_opts(TargetArch) -> end. o2_opts(TargetArch) -> - Common = [icode_ssa_const_prop, icode_ssa_copy_prop, % icode_ssa_struct_reuse, - icode_type, icode_inline_bifs, icode_call_elim, rtl_lcm, - rtl_ssa, rtl_ssa_const_prop, - spillmin_color, use_indexing, remove_comments, - concurrent_comp, binary_opt | o1_opts(TargetArch)], + Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, + rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])], case TargetArch of T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets [icode_range | Common]; @@ -1416,7 +1424,7 @@ o2_opts(TargetArch) -> o3_opts(TargetArch) -> %% no point checking for target architecture since this is checked in 'o1' - [icode_range, {regalloc,coalescing} | o2_opts(TargetArch)]. + [icode_range | o2_opts(TargetArch)]. %% Note that in general, the normal form for options should be positive. %% This is a good programming convention, so that tests in the code say @@ -1452,6 +1460,8 @@ opt_negations() -> {no_pp_native, pp_native}, {no_pp_rtl_lcm, pp_rtl_lcm}, {no_pp_rtl_ssapre, pp_rtl_ssapre}, + {no_ra_partitioned, ra_partitioned}, + {no_ra_prespill, ra_prespill}, {no_remove_comments, remove_comments}, {no_rtl_ssa, rtl_ssa}, {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, @@ -1481,7 +1491,8 @@ opt_basic_expansions() -> [{pp_all, [pp_beam, pp_icode, pp_rtl, pp_native]}]. opt_expansions(TargetArch) -> - [{o1, o1_opts(TargetArch)}, + [{o0, o0_opts(TargetArch)}, + {o1, o1_opts(TargetArch)}, {o2, o2_opts(TargetArch)}, {o3, o3_opts(TargetArch)}, {to_llvm, llvm_opts(o3, TargetArch)}, @@ -1528,13 +1539,21 @@ expand_kt2(Opts) -> -spec expand_options(comp_options(), hipe_architecture()) -> comp_options(). -expand_options(Opts, TargetArch) -> +expand_options(Opts0, TargetArch) -> + Opts1 = proplists:normalize(Opts0, [{aliases, opt_aliases()}]), + Opts = normalise_opt_options(Opts1), proplists:normalize(Opts, [{negations, opt_negations()}, - {aliases, opt_aliases()}, {expand, opt_basic_expansions()}, {expand, opt_expansions(TargetArch)}, {negations, opt_negations()}]). +normalise_opt_options([o0|Opts]) -> [o0] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o1|Opts]) -> [o1] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o2|Opts]) -> [o2] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o3|Opts]) -> [o3] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([O|Opts]) -> [O|normalise_opt_options(Opts)]; +normalise_opt_options([]) -> []. + -spec check_options(comp_options()) -> 'ok'. check_options(Opts) -> diff --git a/lib/hipe/misc/Makefile b/lib/hipe/misc/Makefile index 72cfff21a8..e5033e444b 100644 --- a/lib/hipe/misc/Makefile +++ b/lib/hipe/misc/Makefile @@ -44,7 +44,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) # Target Specs # ---------------------------------------------------- ifdef HIPE_ENABLED -HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi +HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi hipe_segment_trees else HIPE_MODULES = endif diff --git a/lib/hipe/misc/hipe_sdi.erl b/lib/hipe/misc/hipe_sdi.erl index fbb4b105f6..5ca64bc669 100644 --- a/lib/hipe/misc/hipe_sdi.erl +++ b/lib/hipe/misc/hipe_sdi.erl @@ -36,10 +36,13 @@ %%------------------------------------------------------------------------ -type hipe_array() :: integer(). % declare this in hipe.hrl or builtin? +-type hipe_vector(E) :: {} | {E} | {E, E} | {E, E, E} | tuple(). -type label() :: non_neg_integer(). -type address() :: non_neg_integer(). +-type parents() :: {hipe_vector(_ :: integer()), hipe_segment_trees:tree()}. + %%------------------------------------------------------------------------ -record(label_data, {address :: address(), @@ -168,9 +171,11 @@ mk_long(N) -> %%% - Since the graph is traversed from child to parent nodes in %%% Step 3, the edges are represented by a vector PARENTS[0..n-1] %%% such that PARENTS[j] = { i | i is a parent of j }. -%%% - An explicit PARENTS graph would have size O(n^2). Instead we -%%% compute PARENTS[j] from the SDI vector when needed. This -%%% reduces memory overheads, and may reduce time overheads too. +%%% - An explicit PARENTS graph would have size O(n^2). Instead, we +%%% observe that (i is a parent of j) iff (j \in range(i)), where +%%% range(i) is a constant function. We can thus precompute all the +%%% ranges i and insert them into a data structure built for such +%%% queries. In this case, we use a segment tree. -spec mk_span(non_neg_integer(), tuple()) -> hipe_array(). mk_span(N, SDIS) -> @@ -188,7 +193,29 @@ initSPAN(SdiNr, N, SDIS, SPAN) -> initSPAN(SdiNr+1, N, SDIS, SPAN) end. -mk_parents(N, SDIS) -> {N,SDIS}. +-spec mk_parents(non_neg_integer(), tuple()) -> parents(). +mk_parents(N, SDIS) -> + PrevSDIS = vector_from_list(select_prev_sdis(N-1, SDIS, [])), + Ranges = parents_generate_ranges(N-1, PrevSDIS, []), + {PrevSDIS, hipe_segment_trees:build(Ranges)}. + +select_prev_sdis(-1, _SDIS, Acc) -> Acc; +select_prev_sdis(SdiNr, SDIS, Acc) -> + #sdi_data{prevSdi=PrevSdi} = vector_sub(SDIS, SdiNr), + select_prev_sdis(SdiNr-1, SDIS, [PrevSdi|Acc]). + +parents_generate_ranges(-1, _PrevSDIS, Acc) -> Acc; +parents_generate_ranges(SdiNr, PrevSDIS, Acc) -> + %% inclusive + {LO,HI} = parents_generate_range(SdiNr, PrevSDIS), + parents_generate_ranges(SdiNr-1, PrevSDIS, [{LO,HI}|Acc]). + +-compile({inline, parents_generate_range/2}). +parents_generate_range(SdiNr, PrevSDIS) -> + PrevSdi = vector_sub(PrevSDIS, SdiNr), + if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards + true -> {PrevSdi+1, SdiNr-1} % backwards + end. %%% "After the structure is built we process it as follows. %%% For any node i whose listed span exceeds the architectural @@ -209,7 +236,7 @@ mk_parents(N, SDIS) -> {N,SDIS}. %%% and PARENTS are no longer useful. -spec update_long(non_neg_integer(), tuple(), hipe_array(), - {non_neg_integer(),tuple()},hipe_array()) -> 'ok'. + parents(),hipe_array()) -> 'ok'. update_long(N, SDIS, SPAN, PARENTS, LONG) -> WKL = initWKL(N-1, SDIS, SPAN, []), processWKL(WKL, SDIS, SPAN, PARENTS, LONG). @@ -225,46 +252,32 @@ initWKL(SdiNr, SDIS, SPAN, WKL) -> end. -spec processWKL([non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(), tuple()}, hipe_array()) -> 'ok'. + parents(), hipe_array()) -> 'ok'. processWKL([], _SDIS, _SPAN, _PARENTS, _LONG) -> ok; -processWKL([Child|WKL], SDIS, SPAN, PARENTS, LONG) -> - WKL2 = updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG), +processWKL([Child|WKL], SDIS, SPAN, PARENTS0, LONG) -> + {WKL2, PARENTS} = + case array_sub(SPAN, Child) of + 0 -> {WKL, PARENTS0}; % removed + _ -> + SdiData = vector_sub(SDIS, Child), + Incr = sdiLongIncr(SdiData), + array_update(LONG, Child, Incr), + array_update(SPAN, Child, 0), % remove child + PARENTS1 = deleteParent(PARENTS0, Child), + PS = parentsOfChild(PARENTS1, Child), + {updateParents(PS, Child, Incr, SDIS, SPAN, WKL), PARENTS1} + end, processWKL(WKL2, SDIS, SPAN, PARENTS, LONG). --spec updateChild(non_neg_integer(), [non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(),tuple()}, hipe_array()) -> [non_neg_integer()]. -updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG) -> - case array_sub(SPAN, Child) of - 0 -> WKL; % removed - _ -> - SdiData = vector_sub(SDIS, Child), - Incr = sdiLongIncr(SdiData), - array_update(LONG, Child, Incr), - array_update(SPAN, Child, 0), % remove child - PS = parentsOfChild(PARENTS, Child), - updateParents(PS, Child, Incr, SDIS, SPAN, WKL) - end. +-spec parentsOfChild(parents(), non_neg_integer()) -> [non_neg_integer()]. +parentsOfChild({_PrevSDIS, SegTree}, Child) -> + hipe_segment_trees:intersect(Child, SegTree). --spec parentsOfChild({non_neg_integer(),tuple()}, - non_neg_integer()) -> [non_neg_integer()]. -parentsOfChild({N,SDIS}, Child) -> - parentsOfChild(N-1, SDIS, Child, []). - --spec parentsOfChild(integer(), tuple(), non_neg_integer(), - [non_neg_integer()]) -> [non_neg_integer()]. -parentsOfChild(-1, _SDIS, _Child, PS) -> PS; -parentsOfChild(SdiNr, SDIS, Child, PS) -> - SdiData = vector_sub(SDIS, SdiNr), - #sdi_data{prevSdi=PrevSdi} = SdiData, - {LO,HI} = % inclusive - if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards - true -> {PrevSdi+1, SdiNr-1} % backwards - end, - NewPS = - if LO =< Child, Child =< HI -> [SdiNr | PS]; - true -> PS - end, - parentsOfChild(SdiNr-1, SDIS, Child, NewPS). +-spec deleteParent(parents(), non_neg_integer()) -> parents(). +deleteParent({PrevSDIS, SegTree0}, Parent) -> + {LO,HI} = parents_generate_range(Parent, PrevSDIS), + SegTree = hipe_segment_trees:delete(Parent, LO, HI, SegTree0), + {PrevSDIS, SegTree}. -spec updateParents([non_neg_integer()], non_neg_integer(), byte(), tuple(), hipe_array(), @@ -297,10 +310,12 @@ updateWKL(SdiNr, SDIS, SdiSpan, WKL) -> false -> [SdiNr|WKL] end. +-compile({inline, sdiSpanIsShort/2}). %% Only called once -spec sdiSpanIsShort(#sdi_data{}, integer()) -> boolean(). sdiSpanIsShort(#sdi_data{si = #sdi_info{lb = LB, ub = UB}}, SdiSpan) -> SdiSpan >= LB andalso SdiSpan =< UB. +-compile({inline, sdiLongIncr/1}). %% Only called once -spec sdiLongIncr(#sdi_data{}) -> byte(). sdiLongIncr(#sdi_data{si = #sdi_info{incr = Incr}}) -> Incr. @@ -361,9 +376,11 @@ applyIncr([{Label,LabelData}|List], INCREMENT, LabelMap) -> %%% Currently implemented as tuples. %%% Used for the 'SDIS' and 'PARENTS' vectors. --spec vector_from_list([#sdi_data{}]) -> tuple(). +-spec vector_from_list([E]) -> hipe_vector(E). vector_from_list(Values) -> list_to_tuple(Values). +-compile({inline, vector_sub/2}). +-spec vector_sub(hipe_vector(E), non_neg_integer()) -> V when V :: E. vector_sub(Vec, I) -> element(I+1, Vec). %%% ADT for mutable integer arrays, indexed from 0 to N-1. @@ -373,8 +390,10 @@ vector_sub(Vec, I) -> element(I+1, Vec). -spec mk_array_of_zeros(non_neg_integer()) -> hipe_array(). mk_array_of_zeros(N) -> hipe_bifs:array(N, 0). +-compile({inline, array_update/3}). -spec array_update(hipe_array(), non_neg_integer(), integer()) -> hipe_array(). array_update(A, I, V) -> hipe_bifs:array_update(A, I, V). +-compile({inline, array_sub/2}). -spec array_sub(hipe_array(), non_neg_integer()) -> integer(). array_sub(A, I) -> hipe_bifs:array_sub(A, I). diff --git a/lib/hipe/misc/hipe_segment_trees.erl b/lib/hipe/misc/hipe_segment_trees.erl new file mode 100644 index 0000000000..22146396c3 --- /dev/null +++ b/lib/hipe/misc/hipe_segment_trees.erl @@ -0,0 +1,181 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2016. All Rights Reserved. +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Segment trees, with a delete operation. +%%% +%%% Keys are the (0-based) indices into the list passed to build/1. +%%% +%%% Range bounds are inclusive. +%%% + +-module(hipe_segment_trees). + +-export([build/1, intersect/2, delete/4]). + +-record(segment_tree, { + lo :: integer(), + hi :: integer(), + root :: tnode() + }). + +%% X =< Mid belongs in Left +-define(NODE(Left, Right, Mid, Segments), {Left, Right, Mid, Segments}). + +-define(POINT_LEAF(Val), Val). +-define(RANGE_LEAF(Lo, Hi), {Lo, Hi}). + +-type segments() :: [non_neg_integer()]. +-type leaf() :: segments(). +-type tnode() :: ?NODE(tnode(), tnode(), integer(), segments()) | leaf(). + +-opaque tree() :: #segment_tree{} | nil. +-export_type([tree/0]). + +%% @doc Builds a segment tree of the given intervals. +-spec build([{integer(), integer()}]) -> tree(). +build(ListOfIntervals) -> + case + lists:usort( + lists:append( + [[Lo, Hi] || {Lo, Hi} <- ListOfIntervals, Lo =< Hi])) + of + [] -> nil; + Endpoints -> + Tree0 = empty_tree_from_endpoints(Endpoints), + [Lo|_] = Endpoints, + Hi = lists:last(Endpoints), + Tree1 = insert_intervals(0, ListOfIntervals, Lo, Hi, Tree0), + Tree = squash_empty_subtrees(Tree1), + #segment_tree{lo=Lo, hi=Hi, root=Tree} + end. + +empty_tree_from_endpoints(Endpoints) -> + Leaves = leaves(Endpoints), + {T, [], _, _} = balanced_bst(Leaves, length(Leaves)), + T. + +leaves([Endpoint]) -> [?POINT_LEAF(Endpoint)]; +leaves([A | [B|_] = Tail]) -> + %% We omit the range leaf if it's empty + case A<B-1 of + true -> [?POINT_LEAF(A),?RANGE_LEAF(A+1,B-1) | leaves(Tail)]; + false -> [?POINT_LEAF(A) | leaves(Tail)] + end. + +balanced_bst(L, S) when S > 1 -> + Sm = S, %% - 1 + S2 = Sm div 2, + S1 = Sm - S2, + {Left, L1, LeftLo, LeftHi} = balanced_bst(L, S1), + {Right, L2, _, RightHi} = balanced_bst(L1, S2), + T = ?NODE(Left, Right, LeftHi, []), + {T, L2, LeftLo, RightHi}; +balanced_bst([?RANGE_LEAF(Lo, Hi) | L], 1) -> + {[], L, Lo, Hi}; +balanced_bst([?POINT_LEAF(Val) | L], 1) -> + {[], L, Val, Val}. + +insert_intervals(_Ix, [], _Lo, _Hi, Tree) -> Tree; +insert_intervals(Ix, [Int|Ints], Lo, Hi, Tree) -> + insert_intervals(Ix + 1, Ints, Lo, Hi, + insert_interval(Ix, Int, Lo, Hi, Tree)). + +insert_interval(_, {Lo, Hi}, _, _, Node) when Lo > Hi -> Node; +insert_interval(I, Int={Lo,Hi}, NLo, NHi, + ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, [I|Segments]); + true -> + Left = case intervals_intersect(Lo, Hi, NLo, Mid) of + true -> insert_interval(I, Int, NLo, Mid, Left0); + false -> Left0 + end, + Right = case intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> insert_interval(I, Int, Mid+1, NHi, Right0); + false -> Right0 + end, + ?NODE(Left, Right, Mid, Segments) + end; +insert_interval(I, {_Lo,_Hi}, _NLo, _NHi, Leaf) -> [I|Leaf]. + +intervals_intersect(ALo, AHi, BLo, BHi) -> + (ALo =< AHi) andalso (BLo =< BHi) %% both nonempty + andalso nonempty_intervals_intersect(ALo, AHi, BLo, BHi). + +%% Purely optional optimisation +squash_empty_subtrees(?NODE(Left0, Right0, Mid, Segs)) -> + build_squash_node(squash_empty_subtrees(Left0), + squash_empty_subtrees(Right0), + Mid, Segs); +squash_empty_subtrees(Leaf) -> Leaf. + +build_squash_node([], [], _, Segs) -> Segs; +build_squash_node(Left, Right, Mid, Segs) -> + ?NODE(Left, Right, Mid, Segs). + +%% @doc Returns the indices of the intervals in the tree that contains Point. +-spec intersect(integer(), tree()) -> [non_neg_integer()]. +intersect(Point, nil) when is_integer(Point) -> []; +intersect(Point, #segment_tree{lo=Lo, hi=Hi, root=Root}) + when is_integer(Point) -> + case Lo =< Point andalso Point =< Hi of + false -> []; + true -> intersect_1(Point, Root, []) + end. + +intersect_1(Point, ?NODE(Left, Right, Mid, Segs), Acc0) -> + Child = if Point =< Mid -> Left; true -> Right end, + intersect_1(Point, Child, Segs ++ Acc0); +intersect_1(_, LeafSegs, Acc) -> LeafSegs ++ Acc. + +%% @doc Deletes the interval {Lo, Hi}, which had index Index in the list passed +%% to build/1. +-spec delete(non_neg_integer(), integer(), integer(), tree()) -> tree(). +delete(_, _, _, nil) -> nil; +delete(_, Lo, Hi, Tree) when Lo > Hi -> Tree; +delete(_, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi}) + when Hi < TLo; Lo > THi -> Tree; +delete(Index, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi, root=Root0}) + when is_integer(Lo), is_integer(Hi) -> + Root = delete_1(Index, Lo, Hi, TLo, THi, Root0), + Tree#segment_tree{root=Root}. + +delete_1(I, Lo, Hi, NLo, NHi, ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, delete_2(Segments, I)); + true -> + Left = case nonempty_intervals_intersect(Lo, Hi, NLo, Mid) of + true -> delete_1(I, Lo, Hi, NLo, Mid, Left0); + false -> Left0 + end, + Right = case nonempty_intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> delete_1(I, Lo, Hi, Mid+1, NHi, Right0); + false -> Right0 + end, + %% We could do build_squash_node here, is it worth it? + ?NODE(Left, Right, Mid, Segments) + end; +delete_1(I, _Lo, _Hi, _NLo, _NHi, Leaf) -> delete_2(Leaf, I). + +delete_2([I|Segs], I) -> Segs; +delete_2([S|Segs], I) -> [S|delete_2(Segs,I)]. + +-compile({inline,nonempty_intervals_intersect/4}). +nonempty_intervals_intersect(ALo, AHi, BLo, BHi) -> + (BLo =< AHi) andalso (ALo =< BHi). diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl index 4eeb1d71db..a2efd35d26 100644 --- a/lib/hipe/opt/hipe_spillmin.erl +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -29,7 +29,8 @@ %% ========================================================================== %% Exported functions (short description): %% -%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, +%% TempMap) -> %% {Coloring, NumberOfSpills} %% Takes a CFG and the TempMap from register allocation and returns %% a coloring of stack slots. @@ -49,7 +50,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_spillmin). --export([stackalloc/6, mapmerge/2]). +-export([stackalloc/7, stackalloc/8, mapmerge/2]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -59,6 +60,8 @@ -include("../main/hipe.hrl"). -include("../flow/cfg.hrl"). +-type target_context() :: any(). + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) @@ -68,18 +71,29 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> - {hipe_spill_map(), non_neg_integer()}. + comp_options(), module(), target_context(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap) -> + Liveness = TgtMod:analyze(CFG,TgtCtx), + stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, + TempMap) -> case proplists:get_bool(spillmin_color, Options) of false -> - ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, - Options, Target, TempMap), + ?option_time(hipe_spillmin_scan:stackalloc( + CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, + TgtCtx, TempMap), "Spill minimize, linear scan", Options); true -> - ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, - Options, Target, TempMap), + ?option_time(hipe_spillmin_color:stackalloc( + CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, + TgtCtx, TempMap), "Spill minimize, graph coloring", Options) end. diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl index 7c23de44b4..a0d6b03503 100644 --- a/lib/hipe/opt/hipe_spillmin_color.erl +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -41,7 +41,7 @@ -module(hipe_spillmin_color). --export([stackalloc/6]). +-export([stackalloc/8]). %%-ifndef(DO_ASSERT). %%-define(DO_ASSERT, true). @@ -66,13 +66,17 @@ %% where Location is {spill,M}. %% {spill,M} denotes the Mth spilled node --spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> +-type target_context() :: any(). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> +stackalloc(CFG, Live, _StackSlots, SpillIndex, _Options, TargetMod, + TargetContext, TempMap) -> + Target = {TargetMod, TargetContext}, ?report2("building IG~n", []), - {IG, NumNodes} = build_ig(CFG, Target, TempMap), + {IG, NumNodes} = build_ig(CFG, Live, Target, TempMap), {Cols, MaxColors} = color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1), SortedCols = lists:sort(Cols), @@ -167,8 +171,8 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> %% Returns {Interference_graph, Number_Of_Nodes} %% -build_ig(CFG, Target, TempMap) -> - try build_ig0(CFG, Target, TempMap) +build_ig(CFG, Live, Target, TempMap) -> + try build_ig0(CFG, Live, Target, TempMap) catch error:Rsn -> exit({regalloc, build_ig, Rsn}) end. @@ -185,12 +189,11 @@ setup_ets0([X|Xs], Table, N) -> ets:insert(Table, {X, N}), setup_ets0(Xs, Table, N+1). -build_ig0(CFG, Target, TempMap) -> - Live = Target:analyze(CFG), +build_ig0(CFG, Live, Target, TempMap) -> TempMapping = map_spilled_temporaries(TempMap), TempMappingTable = setup_ets(TempMapping), NumSpilled = length(TempMapping), - IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled), + IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled), Target, TempMap, TempMappingTable), ets:delete(TempMappingTable), {normalize_ig(IG), NumSpilled}. @@ -540,18 +543,21 @@ is_visited(X, Vis) -> %% *** INTERFACES TO OTHER MODULES *** %% -liveout(CFG, L, Target) -> - ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +labels(CFG, {TgtMod,TgtCtx}) -> + TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). -bb(CFG, L, Target) -> - hipe_bb:code(Target:bb(CFG, L)). +bb(CFG, L, {TgtMod,TgtCtx}) -> + hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). -def_use(X, Target, TempMap) -> - Defines = [Y || Y <- reg_names(Target:defines(X), Target), +def_use(X, Target={TgtMod,TgtCtx}, TempMap) -> + Defines = [Y || Y <- reg_names(TgtMod:defines(X,TgtCtx), Target), hipe_temp_map:is_spilled(Y, TempMap)], - Uses = [Z || Z <- reg_names(Target:uses(X), Target), + Uses = [Z || Z <- reg_names(TgtMod:uses(X,TgtCtx), Target), hipe_temp_map:is_spilled(Z, TempMap)], {Defines, Uses}. -reg_names(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl index 06b68e1934..097a787152 100644 --- a/lib/hipe/opt/hipe_spillmin_scan.erl +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -60,7 +60,7 @@ -module(hipe_spillmin_scan). --export([stackalloc/6]). +-export([stackalloc/8]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -70,6 +70,8 @@ -include("../main/hipe.hrl"). -include("../flow/cfg.hrl"). +-type target_context() :: any(). + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) @@ -85,15 +87,14 @@ %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TargetMod, + TargetContext, TempMap) -> + Target = {TargetMod, TargetContext}, ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), - %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), USIntervals = calculate_intervals(CFG, Liveness, Options, Target, TempMap), %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -124,8 +125,8 @@ stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> %% all other. %%- - - - - - - - - - - - - - - - - - - - - - - - calculate_intervals(CFG, Liveness, _Options, Target, TempMap) -> - Interval = empty_interval(Target:number_of_temporaries(CFG)), - Worklist = Target:reverse_postorder(CFG), + Interval = empty_interval(number_of_temporaries(CFG, Target)), + Worklist = reverse_postorder(CFG, Target), intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap). %%- - - - - - - - - - - - - - - - - - - - - - - - @@ -538,23 +539,26 @@ extend_interval(Pos, {Beginning, End}) %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> - Target:analyze(CFG). +bb(CFG, L, {TgtMod,TgtCtx}) -> + TgtMod:bb(CFG, L, TgtCtx). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:livein(Liveness, L, TgtCtx), Target). -bb(CFG, L, Target) -> - Target:bb(CFG, L). +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:liveout(Liveness, L, TgtCtx), Target). -livein(Liveness, L, Target) -> - regnames(Target:livein(Liveness, L), Target). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -liveout(Liveness, L, Target) -> - regnames(Target:liveout(Liveness, L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:uses(I,TgtCtx), Target). -uses(I, Target) -> - regnames(Target:uses(I), Target). +defines(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:defines(I,TgtCtx), Target). -defines(I, Target) -> - regnames(Target:defines(I), Target). +regnames(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -regnames(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reverse_postorder(CFG, {TgtMod,TgtCtx}) -> + TgtMod:reverse_postorder(CFG, TgtCtx). diff --git a/lib/hipe/ppc/Makefile b/lib/hipe/ppc/Makefile index 1901dfa671..1ca1d51846 100644 --- a/lib/hipe/ppc/Makefile +++ b/lib/hipe/ppc/Makefile @@ -63,6 +63,7 @@ MODULES=hipe_ppc \ hipe_ppc_ra_postconditions \ hipe_ppc_ra_postconditions_fp \ hipe_ppc_registers \ + hipe_ppc_subst \ hipe_rtl_to_ppc HRL_FILES=hipe_ppc.hrl diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl index 34d4bf54c5..ee9b4432e0 100644 --- a/lib/hipe/ppc/hipe_ppc_cfg.erl +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1, params/1, reverse_postorder/1]). @@ -34,6 +35,7 @@ -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_ppc.hrl"). -include("../flow/cfg.hrl"). diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl index 77b84dc574..305e88488d 100644 --- a/lib/hipe/ppc/hipe_ppc_defuse.erl +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -22,6 +22,7 @@ -module(hipe_ppc_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). -include("hipe_ppc.hrl"). @@ -52,6 +53,9 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(#pseudo_call{}) -> true; +insn_defs_all_gpr(_) -> false. + call_clobbered_gpr() -> [hipe_ppc:mk_temp(R, T) || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -116,6 +120,9 @@ insn_def_fpr(I) -> _ -> [] end. +insn_defs_all_fpr(#pseudo_call{}) -> true; +insn_defs_all_fpr(_) -> false. + call_clobbered_fpr() -> [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index ff0450270f..8d37159ad8 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -24,16 +24,14 @@ -include("hipe_ppc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_ppc:defun_formals(Defun)), - Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_ppc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), - CFG0 = hipe_ppc_cfg:init(Defun), - Liveness = hipe_ppc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_ppc_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = hipe_ppc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_ppc_registers:nr_args(), Formals). @@ -44,27 +42,16 @@ fix_formals(_, []) -> []. do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> Context = mk_context(Liveness, Formals, Temps, ClobbersLR), - CFG1 = do_blocks(CFG0, Context), + CFG1 = hipe_ppc_cfg:map_bbs( + fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG0), do_prologue(CFG1, Context). -do_blocks(CFG, Context) -> - Labels = hipe_ppc_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). - -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), - Block = hipe_ppc_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -573,29 +560,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr([I|Insns]) -> - case I of - #pseudo_call{} -> true; - %% mtspr to lr cannot occur yet - _ -> clobbers_lr(Insns) - end; -clobbers_lr([]) -> false. +clobbers_lr(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_ppc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -624,16 +623,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_ppc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl index fd5cc2befb..5d1b0d0305 100644 --- a/lib/hipe/ppc/hipe_ppc_main.erl +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -24,8 +24,10 @@ rtl_to_ppc(MFA, RTL, Options) -> PPC1 = hipe_rtl_to_ppc:translate(RTL), - PPC2 = hipe_ppc_ra:ra(PPC1, Options), - PPC3 = hipe_ppc_frame:frame(PPC2), + PPC1CFG = hipe_ppc_cfg:init(PPC1), + PPC2CFG = hipe_ppc_ra:ra(PPC1CFG, Options), + PPC3CFG = hipe_ppc_frame:frame(PPC2CFG), + PPC3 = hipe_ppc_cfg:linearise(PPC3CFG), PPC4 = hipe_ppc_finalise:finalise(PPC3), ppc_pp(PPC4, MFA, Options), {native, powerpc, {unprofiled, PPC4}}. diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl index 87c776f5d1..f8614db4ef 100644 --- a/lib/hipe/ppc/hipe_ppc_ra.erl +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -22,36 +22,40 @@ -module(hipe_ppc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_ppc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + FPLiveness0 = hipe_ppc_specific_fp:analyze(CFG0, no_context), + hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, hipe_coalescing_regalloc, - hipe_ppc_specific_fp); + hipe_ppc_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_ppc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG1)), + GPLiveness1 = hipe_ppc_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, + hipe_graph_coloring_regalloc); linear_scan -> - hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_ppc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_ppc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_ppc_pp:pp(Defun2), - hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG2)), + hipe_ppc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_ppc_specific, no_context). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl index ea163221c2..78f123116e 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -23,13 +23,14 @@ -export([finalise/3]). -include("hipe_ppc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_ppc:defun_code(Defun), - {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(ppc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_ppc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl index 6e8304467e..5f331542e8 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_ls.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -21,37 +21,35 @@ %%% Linear Scan register allocator for PowerPC -module(hipe_ppc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_ppc_cfg:init(NewDefun), - SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_ppc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_ppc_registers:allocatable_gpr()-- [hipe_ppc_registers:temp3(), hipe_ppc_registers:temp2(), hipe_ppc_registers:temp1()], [hipe_ppc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_ppc_specific), - {NewDefun, _DidSpill} = + hipe_ppc_specific, no_context), + {NewCFG, _DidSpill} = hipe_ppc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_ppc_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_ppc_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl index 24995be252..322fb1a171 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_naive.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -20,11 +20,11 @@ %% -module(hipe_ppc_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_ppc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl index 0b16ec3891..f084a30e63 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl index 821aa66c11..81064079aa 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -23,13 +23,16 @@ -export([check_and_rewrite/2]). -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. +check_and_rewrite(CFG, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp, no_context), + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl index f4781d5ed7..8f6d9779fc 100644 --- a/lib/hipe/ppc/hipe_ppc_registers.erl +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -201,6 +201,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_ppc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, %% R1 is reserved for C diff --git a/lib/hipe/ppc/hipe_ppc_subst.erl b/lib/hipe/ppc/hipe_ppc_subst.erl new file mode 100644 index 0000000000..5e43fd6471 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_subst.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_subst). +-export([insn_temps/2]). +-include("hipe_ppc.hrl"). + +%% These should be moved to hipe_ppc and exported +-type temp() :: #ppc_temp{}. +-type oper() :: temp() | #ppc_simm16{} | #ppc_uimm16{}. +-type arg() :: temp() | integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + A = fun(O) -> arg_temps(T, O) end, + O = fun(O) -> oper_temps(T, O) end, + case I of + #alu{dst=D,src1=L,src2=R} -> I#alu{dst=T(D),src1=T(L),src2=O(R)}; + #b_label{} -> I; + %% #bc{} -> I; + #bctr{} -> I; + #blr{} -> I; + #cmp{src1=L,src2=R} -> I#cmp{src1=T(L),src2=O(R)}; + #comment{} -> I; + #label{} -> I; + #load{dst=D,base=B} -> I#load{dst=T(D),base=T(B)}; + #loadx{dst=D,base1=L,base2=R} -> I#loadx{dst=T(D),base1=T(L),base2=T(R)}; + #mfspr{dst=D} -> I#mfspr{dst=T(D)}; + #mtcr{src=S} -> I#mtcr{src=T(S)}; + #mtspr{src=S} -> I#mtspr{src=T(S)}; + #pseudo_bc{} -> I; + #pseudo_call{func=F} when not is_record(F, ppc_temp) -> I; + #pseudo_call_prepare{} -> I; + #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; + #pseudo_move{dst=D,src=S} -> I#pseudo_move{dst=T(D),src=T(S)}; + #pseudo_tailcall{func=F,stkargs=Stk} when not is_record(F, ppc_temp) -> + I#pseudo_tailcall{stkargs=lists:map(A,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #store{src=S,base=B} -> I#store{src=T(S),base=T(B)}; + #storex{src=S,base1=L,base2=R} -> + I#storex{src=T(S),base1=T(L),base2=T(R)}; + #unary{dst=D,src=S} -> I#unary{dst=T(D),src=T(S)}; + #lfd{dst=D,base=B} -> I#lfd{dst=T(D),base=T(B)}; + #lfdx{dst=D,base1=L,base2=R} -> I#lfdx{dst=T(D),base1=T(L),base2=T(R)}; + #stfd{src=S,base=B} -> I#stfd{src=T(S),base=T(B)}; + #stfdx{src=S,base1=L,base2=R} -> I#stfdx{src=T(S),base1=T(L),base2=T(R)}; + #fp_binary{dst=D,src1=L,src2=R} -> + I#fp_binary{dst=T(D),src1=T(L),src2=T(R)}; + #fp_unary{dst=D,src=S} -> I#fp_unary{dst=T(D),src=T(S)}; + #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)} + end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(SubstTemp, T=#ppc_temp{}) -> SubstTemp(T); +oper_temps(_SubstTemp, I=#ppc_simm16{}) -> I; +oper_temps(_SubstTemp, I=#ppc_uimm16{}) -> I. + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#ppc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl index a994659616..a01e67a789 100644 --- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -1031,7 +1031,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -1056,13 +1056,28 @@ mk_store(Src, Base1, Base2, StoreSize) -> end. mk_store2(Src, Base1, Base2, StOp) -> - case hipe_ppc:is_temp(Base2) of + case hipe_ppc:is_temp(Base1) of true -> - mk_store_rr(Src, Base1, Base2, StOp); + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_rr(Src, Base1, Base2, StOp); + _ -> + mk_store_ri(Src, Base1, Base2, StOp) + end; _ -> - mk_store_ri(Src, Base1, Base2, StOp) + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_ri(Src, Base2, Base1, StOp); + _ -> + mk_store_ii(Src, Base1, Base2, StOp) + end end. - + +mk_store_ii(Src, Base, Disp, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Disp, StOp)). + mk_store_ri(Src, Base, Disp, StOp) -> hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index aaa4418f37..209f230a9b 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -51,6 +51,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \ hipe_coalescing_regalloc \ hipe_graph_coloring_regalloc \ hipe_regalloc_loop \ + hipe_regalloc_prepass \ hipe_ls_regalloc \ hipe_ppc_specific hipe_ppc_specific_fp \ hipe_sparc_specific hipe_sparc_specific_fp \ @@ -123,7 +124,6 @@ $(EBIN)/hipe_amd64_specific_x87.beam: hipe_x86_specific_x87.erl $(EBIN)/hipe_coalescing_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_graph_coloring_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_ig.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_spillcost.hrl -$(EBIN)/hipe_ig_moves.beam: ../util/hipe_vectors.hrl $(EBIN)/hipe_ls_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_optimistic_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_regalloc_loop.beam: ../main/hipe.hrl diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 50e5869d45..890df1b81a 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -21,38 +21,47 @@ -module(hipe_amd64_specific_sse2). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]). % The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_amd64_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - def_use/1, - is_arg/1, %% used by hipe_ls_regalloc - is_move/1, - is_fixed/1, %% used by hipe_graph_coloring_regalloc - is_global/1, - is_precoloured/1, - reg_nr/1, - non_alloc/1, - allocatable/0, - physical_name/1, - all_precoloured/0, - new_spill_index/1, %% used by hipe_ls_regalloc - var_range/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + def_use/2, + is_arg/2, %% used by hipe_ls_regalloc + is_move/2, + is_fixed/2, %% used by hipe_graph_coloring_regalloc + is_global/2, + is_precoloured/2, + reg_nr/2, + non_alloc/2, + allocatable/1, + allocatable/2, + temp0/1, + physical_name/2, + all_precoloured/1, + new_spill_index/2, %% used by hipe_ls_regalloc + var_range/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3, + check_and_rewrite/4]). + +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). %%---------------------------------------------------------------------------- @@ -60,86 +69,99 @@ %%---------------------------------------------------------------------------- -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite(CFG, Coloring). -check_and_rewrite(Defun, Coloring) -> - hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, Strategy, no_context) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite( + CFG, Coloring, Strategy). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). -is_global(_Reg) -> - false. +is_global(Reg, _) -> + hipe_amd64_registers:sse2_temp0() =:= Reg. -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -is_arg(_Reg) -> +is_arg(_Reg, _) -> false. --spec args(#cfg{}) -> []. -args(_CFG) -> +-spec args(#cfg{}, no_context) -> []. +args(_CFG, _) -> []. -non_alloc(_) -> +non_alloc(_, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_amd64_liveness:analyze(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> [X || X <- hipe_amd64_liveness:livein(Liveness, L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> [X || X <- hipe_amd64_liveness:liveout(BB_in_out_liveness, Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. %% Registers stuff -allocatable() -> - hipe_amd64_registers:allocatable_sse2(). +allocatable(Ctx) -> + allocatable('normal', Ctx). + +allocatable('normal', _) -> + hipe_amd64_registers:allocatable_sse2(); +allocatable('linearscan', _) -> + hipe_amd64_registers:allocatable_sse2() -- + [hipe_amd64_registers:sse2_temp0()]. -all_precoloured() -> - allocatable(). +temp0(_) -> + hipe_amd64_registers:sse2_temp0(). -is_precoloured(Reg) -> - lists:member(Reg,all_precoloured()). +all_precoloured(Ctx) -> + allocatable(Ctx). -physical_name(Reg) -> +is_precoloured(Reg, Ctx) -> + lists:member(Reg,all_precoloured(Ctx)). + +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(x86). --spec number_of_temporaries(#cfg{}) -> non_neg_integer(). -number_of_temporaries(_CFG) -> +-spec number_of_temporaries(#cfg{}, no_context) -> non_neg_integer(). +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_x86_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_x86_cfg:bb_add(CFG,L,BB). + %% AMD64 stuff -def_use(Instruction) -> +def_use(Instruction, _) -> {[X || X <- hipe_amd64_defuse:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double'], @@ -148,17 +170,19 @@ def_use(Instruction) -> hipe_x86:temp_type(X) =:= 'double'] }. -uses(I) -> +uses(I, _) -> [X || X <- hipe_amd64_defuse:insn_use(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -defines(I) -> +defines(I, _) -> [X || X <- hipe_amd64_defuse:insn_def(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -is_move(Instruction) -> +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). + +is_move(Instruction, _) -> case hipe_x86:is_fmove(Instruction) of true -> Src = hipe_x86:fmove_src(Instruction), @@ -168,9 +192,26 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_x86:temp_reg(Reg). --spec new_spill_index(non_neg_integer()) -> pos_integer(). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_reg_nr(_) -> + hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, _Temp, _) -> + hipe_x86:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_amd64_subst:insn_temps( + fun(Op) -> + case hipe_x86:temp_is_allocatable(Op) + andalso hipe_x86:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + +-spec new_spill_index(non_neg_integer(), no_context) -> pos_integer(). +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex + 1. diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index 4e34cb1d99..06ab17b0e9 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -22,114 +22,123 @@ -module(hipe_arm_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_arm_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - hipe_arm_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_arm_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). %% same as hipe_arm_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_arm_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_arm_liveness_gpr:livein(Liveness,L), hipe_arm:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_arm_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_arm:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_arm_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_arm_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_arm_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_arm_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_arm_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(arm). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(arm), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_arm_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_arm_cfg:bb_add(CFG,L,BB). + %% ARM stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_arm_defuse:insn_use_gpr(I), hipe_arm:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_arm_defuse:insn_def_gpr(I), hipe_arm:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_arm_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_arm:is_pseudo_move(Instruction) of true -> Dst = hipe_arm:pseudo_move_dst(Instruction), @@ -142,28 +151,43 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_arm:temp_reg(Reg). +new_reg_nr(_) -> + hipe_gensym:get_next_var(arm). + +update_reg_nr(Nr, Temp, _) -> + hipe_arm:mk_temp(Nr, hipe_arm:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_arm_subst:insn_temps( + fun(Op) -> + case hipe_arm:temp_is_allocatable(Op) of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_arm_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_arm_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_arm_registers:temp1() orelse R =:= hipe_arm_registers:temp2() orelse R =:= hipe_arm_registers:temp3() orelse hipe_arm_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_arm_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_arm_registers:args(hipe_arm_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl index e2f817d369..00bfbaa1b6 100644 --- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -30,7 +30,7 @@ %%----------------------------------------------------------------------- -module(hipe_coalescing_regalloc). --export([regalloc/5]). +-export([regalloc/7]). %%-ifndef(DEBUG). %%-define(DEBUG,true). @@ -51,19 +51,21 @@ %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, TargetContext, + _Options) -> + Target = {TargetMod, TargetContext}, %% Build interference graph ?debug_msg("Build IG\n", []), - IG = hipe_ig:build(CFG, Target), + IG = hipe_ig:build(CFG, Liveness, TargetMod, TargetContext), %% io:format("IG: ~p\n", [IG]), ?debug_msg("Init\n", []), - Num_Temps = Target:number_of_temporaries(CFG), + Num_Temps = TargetMod:number_of_temporaries(CFG,TargetContext), ?debug_msg("Coalescing RA: num_temps = ~p~n", [Num_Temps]), - Allocatable = Target:allocatable(), + Allocatable = TargetMod:allocatable(TargetContext), K = length(Allocatable), All_colors = colset_from_list(Allocatable), @@ -72,7 +74,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> Move_sets = hipe_moves:new(IG), ?debug_msg("Build Worklist\n", []), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, Move_sets, K, Num_Temps), + Worklists = hipe_reg_worklists:new(IG, TargetMod, TargetContext, CFG, + Move_sets, K, Num_Temps), Alias = initAlias(Num_Temps), ?debug_msg("Do coloring\n~p~n", [Worklists]), @@ -81,10 +84,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% io:format("SelStk0 ~w\n",[SelStk0]), ?debug_msg("Init node sets\n", []), Node_sets = hipe_node_sets:new(), - %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n", []), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(TargetMod:all_precoloured(TargetContext), initColor(Num_Temps), Node_sets, Target), ?debug_msg("Assign colors\n", []), @@ -94,9 +97,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% io:format("color0:~w\nColor1:~w\nNodes:~w\nNodes2:~w\nNum_Temps:~w\n",[Color0,Color1,Node_sets,Node_sets2,Num_Temps]), ?debug_msg("Build mapping ~p\n", [Node_sets2]), - Coloring = build_namelist(Node_sets2, SpillIndex, Alias0, Color1), + {Coloring, SpillIndex2} = + build_namelist(Node_sets2, SpillIndex, Alias0, Color1), ?debug_msg("Coloring ~p\n", [Coloring]), - Coloring. + {Coloring, SpillIndex2}. %%---------------------------------------------------------------------- %% Function: do_coloring @@ -379,7 +383,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) -> false -> % Colour case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), assignColors(Stack1, NodeSets1, Color1, Alias, AllColors, Target) end end. @@ -402,7 +406,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) -> defaultColoring([], Color, NodeSets, _Target) -> {Color,NodeSets}; defaultColoring([Reg|Regs], Color, NodeSets, Target) -> - Color1 = setColor(Reg,Target:physical_name(Reg), Color), + Color1 = setColor(Reg,physical_name(Reg,Target), Color), NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), defaultColoring(Regs, Color1, NodeSets1, Target). @@ -567,7 +571,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst,Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -577,7 +581,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), {Moves1, IG, Worklists1, Alias}; true -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V,Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> Moves1 = Moves0, % drop constrained move Move @@ -585,7 +589,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), {Moves1, IG, Worklists2, Alias}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U,Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -627,7 +631,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> %%---------------------------------------------------------------------- add_worklist(Worklists, U, K, Moves, IG, Target) -> - case (not(Target:is_precoloured(U)) + case (not(is_precoloured(U,Target)) andalso not(hipe_moves:move_related(U, Moves)) andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of true -> @@ -711,7 +715,7 @@ combine(U, V, IG, Worklists, Moves, Alias, K, Target) -> combine_edges([], _U, IG, Worklists, Moves, _K, _Target) -> {IG, Worklists, Moves}; -combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges(Ts, U, IG, Worklists, Moves, K, Target); _ -> @@ -728,7 +732,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% worklist, and that's where decrement_degree() expects to find it. %% This issue is not covered in the published algorithm. OldDegree = hipe_ig:get_node_degree(T, IG), - IG1 = hipe_ig:add_edge(T, U, IG, Target), + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), NewDegree = hipe_ig:get_node_degree(T, IG1), Worklists0 = if NewDegree =:= K, OldDegree =:= K-1 -> @@ -767,7 +771,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> ok(T, R, IG, K, Target) -> ((hipe_ig:is_trivially_colourable(T, K, IG)) - orelse Target:is_precoloured(T) + orelse is_precoloured(T,Target) orelse hipe_ig:nodes_are_adjacent(T, R, IG)). %%---------------------------------------------------------------------- @@ -1028,3 +1032,15 @@ freezeEm3(_U, V, _M, K, WorkLists, Moves, IG, _Alias) -> false -> {WorkLists, Moves1} end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl index bc6e442236..e91734d8be 100644 --- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -51,7 +51,7 @@ %% -module(hipe_graph_coloring_regalloc). --export([regalloc/5]). +-export([regalloc/7]). %%-ifndef(DO_ASSERT). %%-define(DO_ASSERT, true). @@ -77,18 +77,21 @@ %% that the coloring agrees with the interference graph (that is, that %% no neighbors have the same register or spill location). -%% @spec regalloc(#cfg{}, non_neg_fixnum(), non_neg_fixnum(), atom(), list()) -> {, non_neg_fixnum()} +%% @spec regalloc(#cfg{}, liveness(), non_neg_fixnum(), non_neg_fixnum(), +%% module(), tgt_ctx(), list()) -> {, non_neg_fixnum()} -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - PhysRegs = Target:allocatable(), +regalloc(CFG, Live, SpillIndex, SpillLimit, TargetMod, TargetContext, + _Options) -> + Target = {TargetMod, TargetContext}, + PhysRegs = allocatable(Target), ?report2("building IG~n", []), - {IG, Spill} = build_ig(CFG, Target), + {IG, Spill} = build_ig(CFG, Live, Target), %% check_ig(IG), ?report3("graph: ~p~nphysical regs: ~p~n", [list_ig(IG), PhysRegs]), %% These nodes *can't* be allocated to registers. - NotAllocatable = [Target:reg_nr(X) || X <- Target:non_alloc(CFG)], + NotAllocatable = non_alloc(CFG, Target), %% i.e. Arguments on x86 ?report2("Nonalloc ~w~n", [NotAllocatable]), @@ -97,7 +100,7 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ordsets:from_list(PhysRegs), SpillIndex, SpillLimit, - Target:number_of_temporaries(CFG), + number_of_temporaries(CFG, Target), Target, NotAllocatable), Coloring = [{X, {reg, X}} || X <- NotAllocatable] ++ Cols, ?ASSERT(check_coloring(Coloring, IG, Target)), @@ -112,15 +115,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% Returns {Interference_graph, Spill_cost_dictionary} %% -build_ig(CFG, Target) -> - try build_ig0(CFG, Target) - catch error:Rsn -> exit({?MODULE, build_ig, Rsn}) - end. - -build_ig0(CFG, Target) -> - Live = Target:analyze(CFG), - NumN = Target:number_of_temporaries(CFG), % poss. N-1? - {IG, Spill} = build_ig_bbs(Target:labels(CFG), +build_ig(CFG, Live, Target) -> + NumN = number_of_temporaries(CFG, Target), % poss. N-1? + {IG, Spill} = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumN), @@ -208,17 +205,8 @@ set_spill_cost(X, N, Spill) -> %% * add low-degree neighbors of z to low %% * restart the while-loop above -color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, NotAllocatable) -> - try color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, - NumNodes, Target, NotAllocatable) - catch - error:Rsn -> - ?error_msg("Coloring failed with ~p~n", [Rsn]), - ?EXIT(Rsn) - end. - -color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, - NotAllocatable) -> +color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, + NotAllocatable) -> ?report("simplification of IG~n", []), K = ordsets:size(PhysRegs), Nodes = list_ig(IG), @@ -234,7 +222,7 @@ color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, ?report(" starting with low degree nodes ~p~n",[Low]), EmptyStk = [], - Precolored = Target:all_precoloured(), + Precolored = all_precoloured(Target), {Stk, NewSpillIx} = simplify(Low, NumNodes, Precolored, IG, Spill, K, SpillIx, EmptyStk, @@ -415,7 +403,7 @@ spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) -> true -> spill_costs(Ns,IG,Vis,Spill, SpillLimit, Target); _ -> - case Target:is_fixed(N) of + case is_fixed(N, Target) of true -> spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); false -> @@ -772,18 +760,36 @@ valid_coloring(X, C, [_|Ys]) -> %% *** INTERFACES TO OTHER MODULES *** %% -liveout(CFG, L, Target) -> - ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +all_precoloured({TgtMod,TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> + TgtMod:allocatable(TgtCtx). + +is_fixed(Reg, {TgtMod,TgtCtx}) -> + TgtMod:is_fixed(Reg, TgtCtx). + +labels(CFG, {TgtMod,TgtCtx}) -> + TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). + +bb(CFG, L, {TgtMod,TgtCtx}) -> + hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). + +def_use(X, Target={TgtMod,TgtCtx}) -> + {ordsets:from_list(reg_names(TgtMod:defines(X,TgtCtx), Target)), + ordsets:from_list(reg_names(TgtMod:uses(X,TgtCtx), Target))}. -bb(CFG, L, Target) -> - hipe_bb:code(Target:bb(CFG, L)). +non_alloc(CFG, Target={TgtMod,TgtCtx}) -> + reg_names(TgtMod:non_alloc(CFG, TgtCtx), Target). -def_use(X, Target) -> - {ordsets:from_list(reg_names(Target:defines(X), Target)), - ordsets:from_list(reg_names(Target:uses(X), Target))}. +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -reg_names(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. %% %% Precoloring: use this version when a proper implementation of @@ -803,5 +809,5 @@ precolor0([R|Rs], Cols, Target) -> {[{R, {reg, physical_name(R, Target)}}|Cs], set_color(R, physical_name(R, Target), Cols1)}. -physical_name(X, Target) -> - Target:physical_name(X). +physical_name(X, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(X, TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ig.erl b/lib/hipe/regalloc/hipe_ig.erl index 8fd5d0df1f..81eee2e03c 100644 --- a/lib/hipe/regalloc/hipe_ig.erl +++ b/lib/hipe/regalloc/hipe_ig.erl @@ -28,7 +28,7 @@ -module(hipe_ig). --export([build/2, +-export([build/4, nodes_are_adjacent/3, node_spill_cost/2, node_adj_list/2, @@ -38,8 +38,8 @@ spill_costs/1, adj_list/1, %% adj_set/1, - add_edge/4, - remove_edge/4, + add_edge/5, + remove_edge/5, %% set_adj_set/2, %% set_adj_list/2, %% set_ig_moves/2, @@ -64,6 +64,9 @@ -include("../flow/cfg.hrl"). -include("hipe_spillcost.hrl"). +-type target_context() :: any(). +-type target() :: {TargetMod :: module(), TargetContext :: target_context()}. + %%---------------------------------------------------------------------- -record(igraph, {adj_set, adj_list, ig_moves, degree, @@ -78,11 +81,11 @@ %% degree, and testing for trivial colourability (degree < K). %%---------------------------------------------------------------------- -degree_new(No_temporaries, Target) -> +degree_new(No_temporaries, {TargetMod, TargetCtx}) -> Degree = hipe_bifs:array(No_temporaries, 0), - K = length(Target:allocatable()), + K = length(TargetMod:allocatable(TargetCtx)), Inf = K + No_temporaries, - precoloured_to_inf_degree(Target:all_precoloured(), Inf, Degree). + precoloured_to_inf_degree(TargetMod:all_precoloured(TargetCtx), Inf, Degree). precoloured_to_inf_degree([], _Inf, Degree) -> Degree; precoloured_to_inf_degree([P|Ps], Inf, Degree) -> @@ -344,7 +347,7 @@ set_spill_costs(Spill_costs, IG) -> IG#igraph{spill_costs = Spill_costs}. %% A new interference record %%---------------------------------------------------------------------- --spec initial_ig(non_neg_integer(), atom()) -> #igraph{}. +-spec initial_ig(non_neg_integer(), target()) -> #igraph{}. initial_ig(NumTemps, Target) -> #igraph{adj_set = adjset_new(NumTemps), @@ -361,20 +364,21 @@ initial_ig(NumTemps, Target) -> %% Description: Constructs an interference graph for the specifyed CFG. %% %% Parameters: -%% CFG -- A Control Flow Graph -%% Target -- The module that contains the target-specific functions +%% CFG -- A Control Flow Graph +%% TargetMod -- The module that contains the target-specific functions +%% TargetCtx -- Context data to pass to TargetMod %% %% Returns: %% An interference graph for the given CFG. %%---------------------------------------------------------------------- --spec build(#cfg{}, atom()) -> #igraph{}. +-spec build(#cfg{}, Liveness::_, module(), target_context()) -> #igraph{}. -build(CFG, Target) -> - BBs_in_out_liveness = Target:analyze(CFG), - Labels = Target:labels(CFG), +build(CFG, BBs_in_out_liveness, TargetMod, TargetCtx) -> + Target = {TargetMod, TargetCtx}, + Labels = TargetMod:labels(CFG, TargetCtx), %% How many temporaries exist? - NumTemps = Target:number_of_temporaries(CFG), + NumTemps = TargetMod:number_of_temporaries(CFG, TargetCtx), IG0 = initial_ig(NumTemps, Target), %%?debug_msg("initial adjset: ~p\n",[element(2, IG0)]), %%?debug_msg("initial adjset array: ~.16b\n",[element(3, element(2, IG0))]), @@ -395,7 +399,7 @@ build(CFG, Target) -> %% CFG -- The Control Flow Graph that we constructs %% the interference graph from. %% Target -- The module containing the target-specific -%% functions +%% functions, along with its context data %% %% Returns: %% An interference graph for the given CFG. @@ -404,13 +408,11 @@ build(CFG, Target) -> analyze_bbs([], _, IG, _, _) -> IG; analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) -> % Get basic block associated with label L - BB = Target:bb(CFG, L), + BB = bb(CFG, L, Target), % Get basic block code BB_code = hipe_bb:code(BB), - % Temporaries that are live out from this basic block - BB_liveout = Target:liveout(BBs_in_out_liveness, L), - % Only temporary numbers - BB_liveout_numbers = reg_numbers(BB_liveout, Target), + % Temporaries that are live out from this basic block, only numbers + BB_liveout_numbers = liveout(BBs_in_out_liveness, L, Target), % {Liveness, New Interference Graph} {_, New_ig, Ref} = analyze_bb_instructions(BB_code, ordsets:from_list(BB_liveout_numbers), @@ -433,7 +435,8 @@ analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) -> %% Live -- All temporaries that are live at the time. %% Live is a set of temporary "numbers only". %% IG -- The interference graph in it's current state -%% Target -- The mopdule containing the target-specific functions +%% Target -- The mopdule containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Live -- Temporaries that are live at entery of basic block @@ -449,7 +452,7 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) -> {Live0, IG0, Ref} = analyze_bb_instructions(Instructions, Live, IG, Target), %% Check for temporaries that are defined and used in instruction - {Def, Use} = Target:def_use(Instruction), + {Def, Use} = def_use(Instruction, Target), %% Convert to register numbers Def_numbers = ordsets:from_list(reg_numbers(Def, Target)), Use_numbers = ordsets:from_list(reg_numbers(Use, Target)), @@ -501,14 +504,15 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) -> %% Def_numbers -- Temporaries that are defined at this instruction %% Use_numbers -- Temporaries that are used at this instruction %% IG -- The interference graph in its current state -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data %% Returns: %% Live -- An updated live set %% IG -- An updated interference graph %%---------------------------------------------------------------------- analyze_move(Instruction, Live, Def_numbers, Use_numbers, IG, Target) -> - case Target:is_move(Instruction) of + case is_move(Instruction,Target) of true -> case {Def_numbers, Use_numbers} of {[Dst], [Src]} -> @@ -554,8 +558,9 @@ interfere([Define|Defines], Living, IG, Target) -> %% Live -- Current live set %% Lives -- Rest of living temporaries. %% IG -- An interference graph -%% Target -- The module containing the target-specific functions -%% Returns: +%% Target -- The module containing the target-specific functions, along +%% with its context data. +%% Returns: %% An updated interference graph %%---------------------------------------------------------------------- @@ -623,11 +628,15 @@ get_moves(IG) -> %% Parameters: %% U -- A temporary number %% V -- A temporary number -%% Target -- The module containing the target-specific functions +%% TargetMod -- The module containing the target-specific functions. +%% TargetCtx -- Context data to pass to TargetMod %% Returns: %% An updated interference graph. %%---------------------------------------------------------------------- +add_edge(U, V, IG, TargetMod, TargetCtx) -> + add_edge(U, V, IG, {TargetMod, TargetCtx}). + add_edge(U, U, IG, _) -> IG; add_edge(U, V, IG, Target) -> case nodes_are_adjacent(U, V, IG) of @@ -652,11 +661,15 @@ add_edge(U, V, IG, Target) -> %% Parameters: %% U -- A temporary number %% V -- A temporary number -%% Target -- The module containing the target-specific functions +%% TargetMod -- The module containing the target-specific functions. +%% TargetCtx -- Context data for TargetMod. %% Returns: %% An updated interference graph. %%---------------------------------------------------------------------- +remove_edge(U, V, IG, TargetMod, TargetCtx) -> + remove_edge(U, V, IG, {TargetMod, TargetCtx}). + remove_edge(U, U, IG, _) -> IG; remove_edge(U, V, IG, Target) -> case nodes_are_adjacent(U, V, IG) of @@ -683,8 +696,8 @@ remove_edge(U, V, IG, Target) -> %% precoloured. %% Adj_list -- An adj_list %% Degree -- The degree that all nodes currently have -%% Target -- The module containing the target-specific -%% functions +%% Target -- The module containing the target-specific +%% functions, along with its context data. %% %% Returns: %% Adj_list -- An updated adj_list data structure @@ -692,7 +705,7 @@ remove_edge(U, V, IG, Target) -> %%---------------------------------------------------------------------- remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> - case Target:is_precoloured(Temp) of + case is_precoloured(Temp,Target) of false -> New_adj_list = hipe_adj_list:remove_edge(Temp, InterfereTemp, Adj_list), degree_dec(Temp, Degree), @@ -714,8 +727,8 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %% precoloured. %% Adj_list -- An adj_list %% Degree -- The degree that all nodes currently have -%% Target -- The module containing the target-specific -%% functions +%% Target -- The module containing the target-specific +%% functions, along with its context data. %% %% Returns: %% Adj_list -- An updated adj_list data structure @@ -723,7 +736,7 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %%---------------------------------------------------------------------- interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> - case Target:is_precoloured(Temp) of + case is_precoloured(Temp, Target) of false -> New_adj_list = hipe_adj_list:add_edge(Temp, InterfereTemp, Adj_list), degree_inc(Temp, Degree), @@ -740,13 +753,14 @@ interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %% %% Parameters: %% TRs -- A list of temporary registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along with +%% its context data. %% Returns: %% A list of register numbers. %%---------------------------------------------------------------------- -reg_numbers(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_numbers(Regs, {TgtMod, TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. %%--------------------------------------------------------------------- %% Print functions - only used for debugging @@ -775,3 +789,24 @@ dec_node_degree(Node, IG) -> is_trivially_colourable(Node, K, IG) -> degree_is_trivially_colourable(Node, K, degree(IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +bb(CFG, L, {TgtMod,TgtCtx}) -> + TgtMod:bb(CFG,L,TgtCtx). + +def_use(Instruction, {TgtMod,TgtCtx}) -> + TgtMod:def_use(Instruction, TgtCtx). + +is_move(Instruction, {TgtMod,TgtCtx}) -> + TgtMod:is_move(Instruction, TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> + reg_numbers(TgtMod:liveout(Liveness,L,TgtCtx), Target). diff --git a/lib/hipe/regalloc/hipe_ig_moves.erl b/lib/hipe/regalloc/hipe_ig_moves.erl index b679453de0..2a70606dab 100644 --- a/lib/hipe/regalloc/hipe_ig_moves.erl +++ b/lib/hipe/regalloc/hipe_ig_moves.erl @@ -25,8 +25,6 @@ new_move/3, get_moves/1]). --include("../util/hipe_vectors.hrl"). - %%----------------------------------------------------------------------------- %% The main data structure; its fields are: %% - movelist : mapping from temp to set of associated move numbers @@ -34,11 +32,13 @@ %% - moveinsns : list of move instructions, in descending move number order %% - moveset : set of move instructions --record(ig_moves, {movelist :: hipe_vector(), +-record(ig_moves, {movelist :: movelist(), nrmoves = 0 :: non_neg_integer(), moveinsns = [] :: [{_,_}], moveset = gb_sets:empty() :: gb_sets:set()}). +-type movelist() :: hipe_vectors:vector(ordsets:ordset(non_neg_integer())). + %%----------------------------------------------------------------------------- -spec new(non_neg_integer()) -> #ig_moves{}. @@ -66,7 +66,8 @@ new_move(Dst, Src, IG_moves) -> moveset = gb_sets:insert(MoveInsn, MoveSet)} end. --spec add_movelist(non_neg_integer(), non_neg_integer(), hipe_vector()) -> hipe_vector(). +-spec add_movelist(non_neg_integer(), non_neg_integer(), movelist()) + -> movelist(). add_movelist(MoveNr, Temp, MoveList) -> AssocMoves = hipe_vectors:get(MoveList, Temp), @@ -74,7 +75,7 @@ add_movelist(MoveNr, Temp, MoveList) -> %% ordset due to the ordsets:union in hipe_coalescing_regalloc:combine(). hipe_vectors:set(MoveList, Temp, ordsets:add_element(MoveNr, AssocMoves)). --spec get_moves(#ig_moves{}) -> {hipe_vector(), non_neg_integer(), tuple()}. +-spec get_moves(#ig_moves{}) -> {movelist(), non_neg_integer(), tuple()}. get_moves(IG_moves) -> % -> {MoveList, NrMoves, MoveInsns} {IG_moves#ig_moves.movelist, diff --git a/lib/hipe/regalloc/hipe_ls_regalloc.erl b/lib/hipe/regalloc/hipe_ls_regalloc.erl index d24b803524..0db18f5c62 100644 --- a/lib/hipe/regalloc/hipe_ls_regalloc.erl +++ b/lib/hipe/regalloc/hipe_ls_regalloc.erl @@ -56,7 +56,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_ls_regalloc). --export([regalloc/7]). +-export([regalloc/9]). %%-define(DEBUG,1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -95,11 +95,10 @@ %% </ol> %% @end %%- - - - - - - - - - - - - - - - - - - - - - - - -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TargetMod, TargetContext) -> + Target = {TargetMod, TargetContext}, ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), - %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), USIntervals = calculate_intervals(CFG, Liveness, Entrypoints, Options, Target), ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -108,10 +107,10 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> %% ?debug_msg("Intervals ~w\n", [Intervals]), ?debug_msg("No intervals: ~w\n",[length(Intervals)]), ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), - Allocation = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), + {Coloring, NewSpillIndex} + = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), - Allocation. - + {Coloring, NewSpillIndex}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @@ -125,32 +124,33 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> %% Liveness: A map of live-in and live-out sets for each Basic-Block. %% Entrypoints: A set of BB names that have external entrypoints. %% -calculate_intervals(CFG,Liveness,_Entrypoints, Options, Target) -> +calculate_intervals(CFG,Liveness,_Entrypoints, Options, + Target={TgtMod,TgtCtx}) -> %% Add start point for the argument registers. Args = arg_vars(CFG, Target), Interval = - add_def_point(Args, 0, empty_interval(Target:number_of_temporaries(CFG))), + add_def_point(Args, 0, empty_interval(number_of_temporaries(CFG, Target))), %% Interval = add_livepoint(Args, 0, empty_interval()), Worklist = case proplists:get_value(ls_order, Options) of reversepostorder -> - Target:reverse_postorder(CFG); + TgtMod:reverse_postorder(CFG, TgtCtx); breadth -> - Target:breadthorder(CFG); + TgtMod:breadthorder(CFG, TgtCtx); postorder -> - Target:postorder(CFG); + TgtMod:postorder(CFG, TgtCtx); inorder -> - Target:inorder(CFG); + TgtMod:inorder(CFG, TgtCtx); reverse_inorder -> - Target:reverse_inorder(CFG); + TgtMod:reverse_inorder(CFG, TgtCtx); preorder -> - Target:preorder(CFG); + TgtMod:preorder(CFG, TgtCtx); prediction -> - Target:predictionorder(CFG); + TgtMod:predictionorder(CFG, TgtCtx); random -> - Target:labels(CFG); + TgtMod:labels(CFG, TgtCtx); _ -> - Target:reverse_postorder(CFG) + TgtMod:reverse_postorder(CFG, TgtCtx) end, %% ?inc_counter(bbs_counter, length(Worklist)), %% ?debug_msg("No BBs ~w\n",[length(Worklist)]), @@ -290,7 +290,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) -> alloc(OtherTemp,NewPhys,NewAlloc), SpillIndex, DontSpill, Target); false -> - NewSpillIndex = Target:new_spill_index(SpillIndex), + NewSpillIndex = new_spill_index(SpillIndex, Target), {NewAlloc2, NewActive4} = spill(OtherTemp, OtherEnd, OtherStart, NewActive3, NewAlloc, SpillIndex, DontSpill, Target), @@ -306,7 +306,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) -> case NewFree of [] -> %% No physical registers available, we have to spill. - NewSpillIndex = Target:new_spill_index(SpillIndex), + NewSpillIndex = new_spill_index(SpillIndex, Target), {NewAlloc, NewActive2} = spill(Temp, endpoint(RegInt), startpoint(RegInt), Active, Alloc, SpillIndex, DontSpill, Target), @@ -752,38 +752,41 @@ create_freeregs([]) -> %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> - Target:analyze(CFG). +bb(CFG, L, {TgtMod, TgtCtx}) -> + TgtMod:bb(CFG,L,TgtCtx). + +livein(Liveness,L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:livein(Liveness,L,TgtCtx), Target). -bb(CFG, L, Target) -> - Target:bb(CFG,L). +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:liveout(Liveness,L,TgtCtx), Target). -livein(Liveness,L, Target) -> - regnames(Target:livein(Liveness,L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:uses(I,TgtCtx), Target). -liveout(Liveness,L, Target) -> - regnames(Target:liveout(Liveness,L), Target). +defines(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:defines(I,TgtCtx), Target). -uses(I, Target) -> - regnames(Target:uses(I), Target). +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). -defines(I, Target) -> - regnames(Target:defines(I), Target). +is_global(R, {TgtMod,TgtCtx}) -> + TgtMod:is_global(R,TgtCtx). -is_precoloured(R, Target) -> - Target:is_precoloured(R). +new_spill_index(SpillIndex, {TgtMod,TgtCtx}) -> + TgtMod:new_spill_index(SpillIndex, TgtCtx). -is_global(R, Target) -> - Target:is_global(R). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -physical_name(R, Target) -> - Target:physical_name(R). +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). -regnames(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +regnames(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -arg_vars(CFG, Target) -> - Target:args(CFG). +arg_vars(CFG, {TgtMod,TgtCtx}) -> + TgtMod:args(CFG,TgtCtx). -is_arg(Reg, Target) -> - Target:is_arg(Reg). +is_arg(Reg, {TgtMod,TgtCtx}) -> + TgtMod:is_arg(Reg,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl index 2ed9ec3b45..031c799a2c 100644 --- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -29,7 +29,7 @@ %%----------------------------------------------------------------------- -module(hipe_optimistic_regalloc). --export([regalloc/5]). +-export([regalloc/7]). -ifndef(DEBUG). %%-define(DEBUG,true). @@ -74,20 +74,22 @@ %% SpillLimit -- Temporaris with numbers higher than this have %% infinit spill cost. %% Consider changing this to a set. -%% Target -- The module containing the target-specific functions. +%% TgtMod -- The module containing the target-specific functions. +%% TgtCtx -- Context data for TgtMod %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- -ifdef(COMPARE_ITERATED_OPTIMISTIC). -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> + Target = {TgtMod, TgtCtx}, + ?debug_msg("optimistic ~w\n",[TgtMod]), ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG_O = hipe_ig:build(CFG, Target), - IG = hipe_ig:build(CFG, Target), + IG_O = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), + IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -98,9 +100,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SavedAdjList = hipe_ig:adj_list(IG), ?debug_msg("Init\n",[]), - No_temporaries = Target:number_of_temporaries(CFG), + No_temporaries = number_of_temporaries(CFG, Target), ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), - Allocatable = Target:allocatable(), + Allocatable = allocatable(Target), K = length(Allocatable), All_colors = colset_from_list(Allocatable), ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), @@ -113,11 +115,13 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?mov_print_memberships(Move_sets), ?debug_msg("Build Worklist\n",[]), - Worklists_O = hipe_reg_worklists:new(IG_O, Target, CFG, Move_sets_O, K, No_temporaries), + Worklists_O = hipe_reg_worklists:new(IG_O, TgtMod, TgtCtx, CFG, Move_sets_O, + K, No_temporaries), ?debug_msg("Worklists:\n ~p\n", [Worklists_O]), ?reg_print_memberships(Worklists_O), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, + No_temporaries), ?debug_msg("New Worklists:\n ~p\n", [Worklists]), ?reg_print_memberships(Worklists), @@ -175,10 +179,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Init node sets\n",[]), Node_sets = hipe_node_sets:new(), - %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n",[]), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(all_precoloured(Target), initColor(No_temporaries), Node_sets, Target), ?debug_msg("Color0\n",[]), ?print_colors(No_temporaries, Color0), @@ -199,9 +203,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring,SpillIndex2} = + build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - SortedColoring = { sort_stack(element(1, Coloring)), element(2, Coloring)}, + SortedColoring = {sort_stack(Coloring), SpillIndex2}, ?debug_msg("SortedColoring ~p\n",[SortedColoring]), %%Coloring. ?debug_msg("----------------------Assign colors _O\n",[]), @@ -217,14 +222,15 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SortedColoring_O = {sort_stack(element(1, Coloring_O)), element(2, Coloring_O)}, ?debug_msg("SortedColoring_O ~p\n",[SortedColoring_O]), sanity_compare(SortedColoring_O, SortedColoring), - Coloring. + {Coloring,SpillIndex2}. -else. -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> + Target = {TgtMod, TgtCtx}, + ?debug_msg("optimistic ~w\n",[TgtMod]), ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG = hipe_ig:build(CFG, Target), + IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -235,9 +241,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SavedAdjList = hipe_ig:adj_list(IG), ?debug_msg("Init\n",[]), - No_temporaries = Target:number_of_temporaries(CFG), + No_temporaries = number_of_temporaries(CFG, Target), ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), - Allocatable = Target:allocatable(), + Allocatable = allocatable(Target), K = length(Allocatable), All_colors = colset_from_list(Allocatable), ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), @@ -250,7 +256,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Build Worklist\n",[]), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, + No_temporaries), ?debug_msg("New Worklists:\n ~p\n", [Worklists]), ?reg_print_memberships(Worklists), @@ -292,10 +299,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Init node sets\n",[]), Node_sets = hipe_node_sets:new(), - %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n",[]), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(all_precoloured(Target), initColor(No_temporaries), Node_sets, Target), ?debug_msg("Color0\n",[]), ?print_colors(No_temporaries, Color0), @@ -316,9 +323,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring, SpillIndex2} = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - Coloring. + {Coloring,SpillIndex2}. -endif. %%---------------------------------------------------------------------- @@ -834,7 +841,8 @@ sort_stack_split(Pivot, [H|T], Smaller, Bigger) -> %% been coalesced, this mapping shows the alias for that %% node. %% AllColors -- This is an ordset containing all the available colors -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Color -- A mapping from nodes to their respective color. @@ -874,7 +882,7 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries, false -> % Color case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), ?debug_msg("Color case. Assigning color ~p to node.~n", [Col]), assignColors(Worklists, Stack1, NodeSets1, Color1, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target) end @@ -902,7 +910,8 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries, %% Alias -- This is a mapping from nodes to nodes. If a node has %% been coalesced, this mapping shows the alias for that %% node. -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Alias -- The restored aliases after the uncoalescing. @@ -1006,7 +1015,7 @@ colorSplit([], _Col, NodeSets, Color, _Target) -> colorSplit([Node|Nodes], Col, NodeSets, Color, Target) -> ?debug_msg(" Coloring node ~p with color ~p.~n", [Node, Col]), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), colorSplit(Nodes, Col, NodeSets1, Color1, Target). %% Place non-colorable nodes in a split at the bottom of the SelectStack. @@ -1035,7 +1044,8 @@ enqueueSplit([Node|Nodes], IG, Stack) -> %% node. %% AllColors -- This is an ordset containing all the available colors %% -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Color -- A mapping from nodes to their respective color. @@ -1065,7 +1075,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> false -> % Colour case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), assignColors_O(Stack1, NodeSets1, Color1, Alias, AllColors, Target) end end. @@ -1079,7 +1089,8 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> %% Regs -- The list of registers to be default colored %% Color -- The color mapping that shall be changed %% NodeSets -- The node sets that shall be updated -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% NewColor -- The updated color mapping @@ -1089,7 +1100,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> defaultColoring([], Color, NodeSets, _Target) -> {Color,NodeSets}; defaultColoring([Reg|Regs], Color, NodeSets, Target) -> - Color1 = setColor(Reg,Target:physical_name(Reg), Color), + Color1 = setColor(Reg,physical_name(Reg,Target), Color), NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), defaultColoring(Regs, Color1, NodeSets1, Target). @@ -1283,7 +1294,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst, Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -1293,13 +1304,13 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> %% drop coalesced move Move {Moves0, IG, Alias, Worklists}; _ -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V, Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> %% drop constrained move Move {Moves0, IG, Alias, Worklists}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U, Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1350,7 +1361,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst, Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -1361,7 +1372,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), {Moves1, IG, Worklists1, Alias}; _ -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V, Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> Moves1 = Moves0, % drop constrained move Move @@ -1369,7 +1380,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), {Moves1, IG, Worklists2, Alias}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U, Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1405,7 +1416,8 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> %% K -- Number of registers %% Moves -- Current move information %% IG -- Interference graph -%% Target -- The containing the target-specific functions +%% Target -- The containing the target-specific functions, along with +%% its context data. %% %% Returns: %% Worklists (updated) @@ -1413,7 +1425,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> -ifdef(COMPARE_ITERATED_OPTIMISTIC). add_worklist(Worklists, U, K, Moves, IG, Target) -> - case (not(Target:is_precoloured(U)) + case (not(is_precoloured(U, Target)) andalso not(hipe_moves:move_related(U, Moves)) andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of true -> @@ -1524,12 +1536,12 @@ combine(U, V, IG, Alias, Worklists, K, Target) -> combine_edges([], _U, IG, _Worklists, _K, _Target) -> IG; -combine_edges([T|Ts], U, IG, Worklists, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges(Ts, U, IG, Worklists, K, Target); _ -> - IG1 = hipe_ig:add_edge(T, U, IG, Target), - IG2 = case Target:is_precoloured(T) of + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), + IG2 = case is_precoloured(T, Target) of true -> IG1; false -> hipe_ig:dec_node_degree(T, IG1) end, @@ -1559,7 +1571,7 @@ combine_edges([T|Ts], U, IG, Worklists, K, Target) -> -ifdef(COMPARE_ITERATED_OPTIMISTIC). combine_edges_O([], _U, IG, Worklists, Moves, _K, _Target) -> {IG, Worklists, Moves}; -combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges_O(Ts, U, IG, Worklists, Moves, K, Target); _ -> @@ -1576,7 +1588,7 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% worklist, and that's where decrement_degree() expects to find it. %% This issue is not covered in the published algorithm. OldDegree = hipe_ig:get_node_degree(T, IG), - IG1 = hipe_ig:add_edge(T, U, IG, Target), + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), NewDegree = hipe_ig:get_node_degree(T, IG1), Worklists0 = if NewDegree =:= K, OldDegree =:= K-1 -> @@ -1609,7 +1621,8 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% Alias -- The Alias vector before undoing %% SavedAdj -- Saved adjacency list %% IG -- Interference graph -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% list of primitive nodes, that is all nodes that were previously @@ -1676,7 +1689,8 @@ findPrimitiveNodes(Node, N, Alias, PrimitiveNodes) -> %% N -- Node that should be uncoalesced %% SavedAdj -- Saved adjacency list %% IG -- Interference graph -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% updated Interferece graph @@ -1702,16 +1716,16 @@ fixAdj(N, SavedAdj, IG, Target) -> removeAdj([], _N, _IG, _Target) -> true; -removeAdj([V| New], N, IG, Target) -> - hipe_ig:remove_edge(V, N, IG, Target), +removeAdj([V| New], N, IG, Target={TgtMod,TgtCtx}) -> + hipe_ig:remove_edge(V, N, IG, TgtMod, TgtCtx), removeAdj(New, N, IG, Target). %%restoreAdj([], _N, IG, _Alias, _Target) -> %% %%?debug_msg("adj_lists__after_restore_o ~n~p~n", [hipe_ig:adj_list(IG)]), %% IG; -%%restoreAdj([V| AdjToN], N, IG, Alias, Target) -> +%%restoreAdj([V| AdjToN], N, IG, Alias, Target={TgtMod,TgtCtx}) -> %% AliasToV = getAlias(V, Alias), -%% IG1 = hipe_ig:add_edge(N, AliasToV, IG, Target), +%% IG1 = hipe_ig:add_edge(N, AliasToV, IG, TgtMod, TgtCtx), %% restoreAdj(AdjToN, N, IG1, Alias, Target). %% XXX This is probably a clumsy way of doing it @@ -1744,7 +1758,8 @@ findNew([A| Adj], Saved, New) -> %% R -- Other node to test %% IG -- Interference graph %% K -- Number of registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% true iff coalescing is OK @@ -1752,7 +1767,7 @@ findNew([A| Adj], Saved, New) -> ok(T, R, IG, K, Target) -> ((hipe_ig:is_trivially_colourable(T, K, IG)) - orelse Target:is_precoloured(T) + orelse is_precoloured(T, Target) orelse hipe_ig:nodes_are_adjacent(T, R, IG)). %%---------------------------------------------------------------------- @@ -1765,7 +1780,8 @@ ok(T, R, IG, K, Target) -> %% U -- Node to test for coalescing %% IG -- Interference graph %% K -- Number of registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% true iff coalescing is OK for all nodes in the list @@ -2042,3 +2058,24 @@ freezeEm3(_U,V,_M,K,WorkLists,Moves,IG,_Alias) -> {WorkLists,Moves1} end. -endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +all_precoloured({TgtMod,TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> + TgtMod:allocatable(TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index c49b1e510f..ed7a26de8c 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -22,114 +22,123 @@ -module(hipe_ppc_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> + hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). %% same as hipe_ppc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_ppc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_ppc_liveness_gpr:livein(Liveness,L), hipe_ppc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_ppc_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_ppc:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_ppc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_ppc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_ppc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_ppc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(ppc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_ppc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_ppc_cfg:bb_add(CFG,L,BB). + %% PowerPC stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_ppc_defuse:insn_use_gpr(I), hipe_ppc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_ppc_defuse:insn_def_gpr(I), hipe_ppc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_ppc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_ppc:is_pseudo_move(Instruction) of true -> Dst = hipe_ppc:pseudo_move_dst(Instruction), @@ -142,28 +151,45 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +new_reg_nr(_) -> + hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, Temp, _) -> + hipe_ppc:mk_temp(Nr, hipe_ppc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_ppc_subst:insn_temps( + fun(Op) -> + case hipe_ppc:temp_is_allocatable(Op) + andalso hipe_ppc:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_ppc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_ppc_registers:temp1() orelse R =:= hipe_ppc_registers:temp2() orelse R =:= hipe_ppc_registers:temp3() orelse hipe_ppc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_ppc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_ppc_registers:args(hipe_ppc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 454aa4c686..6daa624720 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -22,126 +22,152 @@ -module(hipe_ppc_specific_fp). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, _) -> + hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_ppc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> hipe_ppc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> hipe_ppc_liveness_fpr:liveout(BB_in_out_liveness, Label). %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_ppc_registers:allocatable_fpr(). -all_precoloured() -> - allocatable(). +all_precoloured(Ctx) -> + allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_ppc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(ppc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_ppc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_ppc_cfg:bb_add(CFG,L,BB). + %% PowerPC stuff -def_use(I) -> - {defines(I), uses(I)}. +def_use(I, Ctx) -> + {defines(I, Ctx), uses(I, Ctx)}. -uses(I) -> +uses(I, _) -> hipe_ppc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) -> hipe_ppc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> + hipe_ppc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) -> hipe_ppc:is_pseudo_fmove(I). -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +new_reg_nr(_) -> + hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, _Temp, _) -> + hipe_ppc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_ppc_subst:insn_temps( + fun(Op) -> + case hipe_ppc:temp_is_allocatable(Op) + andalso hipe_ppc:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + -ifdef(notdef). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_ppc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) -> false. -is_arg(_R) -> +is_arg(_R, _) -> false. -args(_CFG) -> +args(_CFG, _) -> []. -endif. diff --git a/lib/hipe/regalloc/hipe_reg_worklists.erl b/lib/hipe/regalloc/hipe_reg_worklists.erl index 88585f9f38..00679cf19c 100644 --- a/lib/hipe/regalloc/hipe_reg_worklists.erl +++ b/lib/hipe/regalloc/hipe_reg_worklists.erl @@ -30,8 +30,8 @@ -module(hipe_reg_worklists). -author(['Andreas Wallin', 'Thorild Selén']). --export([new/5, % only used by optimistic allocator - new/6, +-export([new/6, % only used by optimistic allocator + new/7, simplify/1, spill/1, freeze/1, @@ -90,29 +90,32 @@ %% %%%---------------------------------------------------------------------- -new(IG, Target, CFG, K, No_temporaries) -> % only used by optimistic allocator +%% only used by optimistic allocator +new(IG, TargetMod, TargetCtx, CFG, K, No_temporaries) -> CoalescedTo = hipe_bifs:array(No_temporaries, 'none'), - init(initial(Target, CFG), K, IG, empty(No_temporaries, CoalescedTo)). + init(initial(TargetMod, TargetCtx, CFG), K, IG, + empty(No_temporaries, CoalescedTo)). -new(IG, Target, CFG, Move_sets, K, No_temporaries) -> - init(initial(Target, CFG), K, IG, Move_sets, empty(No_temporaries, [])). +new(IG, TargetMod, TargetCtx, CFG, Move_sets, K, No_temporaries) -> + init(initial(TargetMod, TargetCtx, CFG), K, IG, Move_sets, + empty(No_temporaries, [])). -initial(Target, CFG) -> - {Min_temporary, Max_temporary} = Target:var_range(CFG), - NonAlloc = Target:non_alloc(CFG), - non_precoloured(Target, Min_temporary, Max_temporary, []) - -- [Target:reg_nr(X) || X <- NonAlloc]. +initial(TargetMod, TargetCtx, CFG) -> + {Min_temporary, Max_temporary} = TargetMod:var_range(CFG, TargetCtx), + NonAlloc = TargetMod:non_alloc(CFG, TargetCtx), + non_precoloured(TargetMod, TargetCtx, Min_temporary, Max_temporary, []) + -- [TargetMod:reg_nr(X, TargetCtx) || X <- NonAlloc]. -non_precoloured(Target, Current, Max_temporary, Initial) -> +non_precoloured(TargetMod, TargetCtx, Current, Max_temporary, Initial) -> if Current > Max_temporary -> Initial; true -> NewInitial = - case Target:is_precoloured(Current) of + case TargetMod:is_precoloured(Current, TargetCtx) of true -> Initial; false -> [Current|Initial] end, - non_precoloured(Target, Current+1, Max_temporary, NewInitial) + non_precoloured(TargetMod, TargetCtx, Current+1, Max_temporary, NewInitial) end. %% construct an empty initialized worklists data structure diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index d29615a3a0..3777f90534 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -21,38 +21,44 @@ %%% Common wrapper for graph_coloring and coalescing regallocs. -module(hipe_regalloc_loop). --export([ra/5, ra_fp/4]). +-export([ra/7, ra_fp/6]). %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> - {NewDefun, Coloring, _NewSpillIndex} = - ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod), - {NewDefun, Coloring}. +ra(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, TargetCtx) -> + {NewCFG, Liveness, Coloring, _NewSpillIndex} = + ra_common(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, + TargetCtx), + {NewCFG, Liveness, Coloring}. -ra_fp(Defun, Options, RegAllocMod, TargetMod) -> - ra_common(Defun, 0, Options, RegAllocMod, TargetMod). +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) -> + ra_common(CFG, Liveness, 0, Options, RegAllocMod, TargetMod, TargetCtx). -ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> +ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, + TargetCtx) -> ?inc_counter(ra_calls_counter, 1), - CFG = TargetMod:defun_to_cfg(Defun), - SpillLimit = TargetMod:number_of_temporaries(CFG), - alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). + SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx), + {Coloring, _, CFG, Liveness} = + call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx), + %% The first iteration, the hipe_regalloc_prepass may create new temps, these + %% should not end up above SpillLimit. + SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), + alloc(Coloring, CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx). -alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> +alloc(Coloring, CFG0, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) -> ?inc_counter(ra_iteration_counter, 1), - CFG = TargetMod:defun_to_cfg(Defun), - {Coloring, _NewSpillIndex} = - RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit, TargetMod, Options), - {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), + {CFG, DidSpill} = TargetMod:check_and_rewrite(CFG0, Coloring, TargetCtx), case DidSpill of false -> %% No new temps, we are done. ?add_spills(Options, _NewSpillIndex), - TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), - {TempMap2, NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - TargetMod, TempMap), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG0, Liveness, [], SpillIndex, Options, + TargetMod, TargetCtx, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), %% case proplists:get_bool(verbose_spills, Options) of @@ -61,9 +67,38 @@ alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> %% false -> %% ok %% end, - {NewDefun, Coloring2, NewSpillIndex2}; + {CFG, Liveness, Coloring2, NewSpillIndex2}; _ -> %% Since SpillLimit is used as a low-water-mark %% the list of temps not to spill is uninteresting. - alloc(NewDefun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) + {NewColoring, _NewSpillIndex} = + call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx), + alloc(NewColoring, CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) + end. + +call_allocator_initial(CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) -> + case proplists:get_bool(ra_prespill, Options) of + true -> + hipe_regalloc_prepass:regalloc_initial( + RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options); + false -> + {C, SI} = RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, + TargetMod, TargetCtx, Options), + {C, SI, CFG, Liveness} + end. + +call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod, + TargetMod, TargetCtx) -> + case proplists:get_bool(ra_prespill, Options) of + true -> + hipe_regalloc_prepass:regalloc( + RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options); + false -> + RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options) end. diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl new file mode 100644 index 0000000000..2f1597ffd1 --- /dev/null +++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl @@ -0,0 +1,1006 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% PREPASS FOR ITERATED REGISTER ALLOCATORS +%% +%% Implements a trivial partial but optimal fast register allocator to be used +%% as the first pass of the register allocation loop. +%% +%% The idea is to drastically reduce the number of temporaries, so as to speed +%% up the real register allocators. +%% +%% * Spills trivially unallocatable temps +%% This relies on the fact that calls intentionally clobber all registers. +%% Since this is the case, any temp that is alive over a call can't possibly +%% be allocated to anything but a spill slot. +%% +%% * Partitions the program at points where no pseudos that were not spiled are +%% live, and then do register allocation on these partitions independently. +%% These program points are commonly, but not exclusively, the call +%% instructions. +%% +%% TODO +%% * This module seems very successful at finding every single spill; register +%% allocation performance should be improved if we short-circuit the first +%% hipe_regalloc_loop iteration, skipping directly to rewrite without ever +%% calling RegAllocMod. +-module(hipe_regalloc_prepass). +-export([regalloc/8, regalloc_initial/8]). + +-ifndef(DEBUG). +-compile(inline). +-endif. + +%%-define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +%%% TUNABLES + +%% Partitions with fewer than ?TUNE_TOO_FEW_BBS basic block halves are merged +%% together before register allocation. +-define(TUNE_TOO_FEW_BBS, 256). + +%% Ignore the ra_partitioned option (and do whole function RA instead) when +%% there are fewer than ?TUNE_MIN_SPLIT_BBS basic blocks. +-define(TUNE_MIN_SPLIT_BBS, 384). + +%% We present a "pseudo-target" to the register allocator we wrap. +-export([analyze/2, + all_precoloured/1, + allocatable/1, + args/2, + bb/3, + def_use/2, + defines/2, + is_fixed/2, % used by hipe_graph_coloring_regalloc + is_global/2, + is_move/2, + is_precoloured/2, + labels/2, + livein/3, + liveout/3, + non_alloc/2, + number_of_temporaries/2, + physical_name/2, + postorder/2, + reg_nr/2, + uses/2, + var_range/2, + reverse_postorder/2]). + +-record(prepass_ctx, + {target_mod :: module() + ,target_ctx :: target_context() + ,sub :: sub_map() % Translates temp numbers found in CFG and understood by + % Target to temp numbers passed to RegAllocMod. + ,inv :: inv_map() % Translates temp numbers passed to RegAllocMod + % to temp numbers found in CFG and understood by + % Target + ,max_phys :: temp() % Exclusive upper bound on physical registers + }). + +-record(cfg, + {cfg :: target_cfg() + ,bbs :: transformed_bbs() + ,max_reg :: temp() % Exclusive upper bound on temp numbers + ,rpostorder :: undefined % Only precomputed with partitioned cfg + | [label()] + }). + +-type bb() :: hipe_bb:bb(). % containing instr() +-type liveset() :: ordsets:ordset(temp()). +-record(transformed_bb, + {bb :: bb() + ,livein :: liveset() + ,liveout :: liveset() + }). +-type transformed_bb() :: #transformed_bb{}. +-type transformed_bbs() :: #{label() => transformed_bb()}. + +-record(instr, + {defuse :: {[temp()], [temp()]} + ,is_move :: boolean() + }). +-type instr() :: #instr{}. + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type target_reg() :: non_neg_integer(). +-type target_liveness() :: any(). +-type target_liveset() :: ordsets:ordset(target_reg()). +-type target_context() :: any(). +-type spillno() :: non_neg_integer(). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec regalloc(module(), target_cfg(), target_liveness(), spillno(), spillno(), + module(), target_context(), proplists:proplist()) + -> {hipe_map(), spillno()}. +regalloc(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TargetMod, + TargetCtx, Options) -> + {Coloring, SpillIndex, same} = + regalloc_1(RegAllocMod, CFG, SpillIndex0, SpillLimit, TargetMod, + TargetCtx, Options, Liveness), + {Coloring, SpillIndex}. + +%% regalloc_initial/7 is allowed to introduce new temporaries, unlike +%% regalloc/7. +%% In order for regalloc/7 to never introduce temporaries, regalloc/7 must never +%% choose to do split allocation unless regalloc_initial/7 does. This is the +%% reason that the splitting heuristic is solely based on the number of basic +%% blocks, which does not change during the register allocation loop. +-spec regalloc_initial(module(), target_cfg(), target_liveness(), spillno(), + spillno(), module(), target_context(), + proplists:proplist()) + -> {hipe_map(), spillno(), target_cfg(), + target_liveness()}. +regalloc_initial(RegAllocMod, CFG0, Liveness0, SpillIndex0, SpillLimit, + TargetMod, TargetCtx, Options) -> + {Coloring, SpillIndex, NewCFG} = + regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, + Options, Liveness0), + {CFG, Liveness} = + case NewCFG of + same -> {CFG0, Liveness0}; + {rewritten, CFG1} -> {CFG1, TargetMod:analyze(CFG1, TargetCtx)} + end, + {Coloring, SpillIndex, CFG, Liveness}. + +regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, + Options, Liveness) -> + {ScanBBs, Seen, SpillMap, SpillIndex1} = + scan_cfg(CFG0, Liveness, SpillIndex0, TargetMod, TargetCtx), + + {PartColoring, SpillIndex, NewCFG} = + case proplists:get_bool(ra_partitioned, Options) + andalso length(TargetMod:labels(CFG0, TargetCtx)) > ?TUNE_MIN_SPLIT_BBS + of + true -> + regalloc_partitioned(SpillMap, SpillIndex1, SpillLimit, ScanBBs, + CFG0, TargetMod, TargetCtx, RegAllocMod, Options); + _ -> + regalloc_whole(Seen, SpillMap, SpillIndex1, SpillLimit, ScanBBs, + CFG0, TargetMod, TargetCtx, RegAllocMod, Options) + end, + + SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpillMap)], + Coloring = SpillColors ++ PartColoring, + + ?ASSERT(begin + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + Unused = unused(live_pseudos(Seen, SpillMap, MaxPhys), + SpillMap, CFG0, TargetMod, TargetCtx), + unused_unused(Unused, CFG0, TargetMod, TargetCtx) + end), + ?ASSERT(begin + CFG = + case NewCFG of + same -> CFG0; + {rewritten, CFG1} -> CFG1 + end, + check_coloring(Coloring, CFG, TargetMod, TargetCtx) + end), % Sanity-check + ?ASSERT(just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, + TargetMod, TargetCtx, Options, SpillMap, Coloring, + Unused)), + {Coloring, SpillIndex, NewCFG}. + +regalloc_whole(Seen, SpillMap, SpillIndex0, SpillLimit, ScanBBs, + CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = + number_and_map(AllPrecoloured, LivePseudos, SpillLimit), + BBs = transform_whole_cfg(ScanBBs, SubMap), + SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR}, + SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, + max_phys=MaxPhys, inv=InvMap, sub=SubMap}, + {SubColoring, SpillIndex} = + RegAllocMod:regalloc(SubMod, SubMod, SpillIndex0, SubSpillLimit, ?MODULE, + SubContext, Options), + ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), + {translate_coloring(SubColoring, InvMap), SpillIndex, same}. + +regalloc_partitioned(SpillMap, SpillIndex0, SpillLimit, ScanBBs, + CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + + DSets0 = initial_dsets(CFG, TargetMod, TargetCtx), + PartBBList = part_cfg(ScanBBs, SpillMap, MaxPhys), + DSets1 = join_whole_blocks(PartBBList, DSets0), + {PartBBsRLList, DSets2} = merge_small_parts(DSets1), + {PartBBs, DSets3} = merge_pointless_splits(PartBBList, ScanBBs, DSets2), + SeenMap = collect_seenmap(PartBBsRLList, PartBBs), + {RPostMap, _DSets4} = part_order(TargetMod:reverse_postorder(CFG, TargetCtx), + DSets3), + + {Allocations, SpillIndex} = + lists:mapfoldl( + fun({Root, Elems}, SpillIndex1) -> + #{Root := Seen} = SeenMap, + #{Root := RPost} = RPostMap, + LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = + number_and_map(AllPrecoloured, LivePseudos, SpillLimit), + BBs = transform_cfg(Elems, PartBBs, SubMap), + SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, rpostorder=RPost}, + SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, + max_phys=MaxPhys, inv=InvMap, sub=SubMap}, + {SubColoring, SpillIndex2} = + RegAllocMod:regalloc(SubMod, SubMod, SpillIndex1, SubSpillLimit, + ?MODULE, SubContext, Options), + ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), + {{translate_coloring(SubColoring, InvMap), Elems}, SpillIndex2} + end, SpillIndex0, PartBBsRLList), + {Coloring, NewCFG} = + combine_allocations(Allocations, MaxPhys, PartBBs, TargetMod, TargetCtx, + CFG), + {Coloring, SpillIndex, NewCFG}. + +-spec number_and_map([target_reg()], target_liveset(), target_reg()) + -> {sub_map(), inv_map(), temp(), temp(), temp()}. +number_and_map(Phys, Pseud, SpillLimit) -> + MaxPhys = lists:max(Phys) + 1, + ?ASSERT(Pseud =:= [] orelse lists:min(Pseud) >= MaxPhys), + NrPseuds = length(Pseud), + MaxR = MaxPhys+NrPseuds, + PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)), + MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped + ++ PseudNrs, + ?ASSERT(MapList =:= lists:ukeysort(1, MapList)), + SubMap = {s,maps:from_list(MapList)}, + InvMap = {i,maps:from_list([{Fake, Real} || {Real, Fake} <- MapList])}, + SubSpillLimit = translate_spill_limit(MapList, SpillLimit), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}. + +-spec translate_spill_limit([{target_reg(), temp()}], target_reg()) -> temp(). +translate_spill_limit([{Real,Fake}], SpillLimit) when Real < SpillLimit -> + Fake + 1; +translate_spill_limit([{Real,_}|Ps], SpillLimit) when Real < SpillLimit -> + translate_spill_limit(Ps, SpillLimit); +translate_spill_limit([{Real,Fake}|_], SpillLimit) when Real >= SpillLimit -> + Fake. + +-spec live_pseudos(seen(), spill_map(), target_reg()) -> target_liveset(). +live_pseudos(Seen, SpillMap, MaxPhys) -> + %% When SpillMap is much larger than Seen (which is typical in the partitioned + %% case), it is much more efficient doing it like this than making an ordset + %% of the spills and subtracting. + ordsets:from_list( + lists:filter(fun(R) -> R >= MaxPhys andalso not maps:is_key(R, SpillMap) + end, maps:keys(Seen))). + +-spec translate_coloring(hipe_map(), inv_map()) -> hipe_map(). +translate_coloring(SubColoring, InvMap) -> + lists:map(fun({T, P}) -> {imap_get(T, InvMap), P} end, SubColoring). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Spill trivially unallocatable temps, create internal target-independent +%% program representation, and collect a set of all used temps. +-record(spill_state, + {map :: spill_map() + ,ix :: spillno() + }). +-type spill_state() :: #spill_state{}. +-type spill_map() :: #{target_reg() => spillno()}. + +-spec scan_cfg(target_cfg(), target_liveness(), spillno(), module(), + target_context()) + -> {scan_bbs() + ,seen() + ,spill_map() + ,spillno() + }. +scan_cfg(CFG, Liveness, SpillIndex0, TgtMod, TgtCtx) -> + State0 = #spill_state{map=#{}, ix=SpillIndex0}, + {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} = + scan_bbs(TgtMod:labels(CFG,TgtCtx), CFG, Liveness, #{}, State0, #{}, TgtMod, + TgtCtx), + {BBs, Seen, Spill, SpillIndex}. + +-type seen() :: #{target_reg() => []}. % set +-type scan_bb() :: {[instr()], target_liveset(), target_liveset()}. +-type scan_bbs() :: #{label() => scan_bb()}. + +-spec scan_bbs([label()], target_cfg(), target_liveness(), seen(), + spill_state(), scan_bbs(), module(), target_context()) + -> {scan_bbs(), seen(), spill_state()}. +scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _TgtMod, _TgtCtx) -> + {BBs, Seen, State}; +scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, TgtMod, TgtCtx) -> + Liveout = t_liveout(Liveness, L, TgtMod, TgtCtx), + {Code, Livein, Seen, State} = + scan_bb(lists:reverse(hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx))), Liveout, + Seen0, State0, [], TgtMod, TgtCtx), + BB = {Code, Livein, Liveout}, + scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>BB}, TgtMod, TgtCtx). + +-spec scan_bb([target_instr()], target_liveset(), seen(), spill_state(), + [instr()], module(), target_context()) + -> {[instr()] + ,target_liveset() + ,seen() + ,spill_state() + }. +scan_bb([], Live, Seen, State, IAcc, _TgtMod, _TgtCtx) -> + {IAcc, Live, Seen, State}; +scan_bb([I|Is], Live0, Seen0, State0, IAcc0, TgtMod, TgtCtx) -> + {TDef, TUse} = TgtMod:def_use(I,TgtCtx), + ?ASSERT(TDef =:= TgtMod:defines(I,TgtCtx)), + ?ASSERT(TUse =:= TgtMod:uses(I,TgtCtx)), + Def = ordsets:from_list(reg_names(TDef, TgtMod, TgtCtx)), + Use = ordsets:from_list(reg_names(TUse, TgtMod, TgtCtx)), + Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live0, Def)), + Seen = add_seen(Def, add_seen(Use, Seen0)), + NewI = #instr{defuse={Def, Use}, is_move=TgtMod:is_move(I,TgtCtx)}, + IAcc = [NewI|IAcc0], + State = + case TgtMod:defines_all_alloc(I,TgtCtx) of + false -> State0; + true -> spill_all(ToSpill, TgtMod, TgtCtx, State0) + end, + %% We can drop "no-ops" here; where (if anywhere) is it worth it? + scan_bb(Is, Live, Seen, State, IAcc, TgtMod, TgtCtx). + +-spec t_liveout(target_liveness(), label(), module(), target_context()) -> + target_liveset(). +t_liveout(Liveness, L, TgtMod, TgtCtx) -> + %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted + %% or consist of a few sorted subsequences (per type) + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), TgtMod, + TgtCtx)). + +-spec reg_names([target_temp()], module(), target_context()) -> [target_reg()]. +reg_names(Regs, TgtMod, TgtCtx) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +-spec add_seen([target_reg()], seen()) -> seen(). +add_seen([], Seen) -> Seen; +add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}). + +-spec spill_all([target_reg()], module(), target_context(), spill_state()) -> + spill_state(). +spill_all([], _TgtMod, _TgtCtx, State) -> State; +spill_all([R|Rs], TgtMod, TgtCtx, State=#spill_state{map=Map, ix=Ix}) -> + case TgtMod:is_precoloured(R,TgtCtx) or maps:is_key(R, Map) of + true -> spill_all(Rs, TgtMod, TgtCtx, State); + false -> spill_all(Rs, TgtMod, TgtCtx, + State#spill_state{map=Map#{R=>Ix}, ix=Ix+1}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (without split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_whole_cfg(scan_bbs(), sub_map()) -> transformed_bbs(). +transform_whole_cfg(BBs0, SubMap) -> + maps:map(fun(_, BB) -> transform_whole_bb(BB, SubMap) end, BBs0). + +-spec transform_whole_bb(scan_bb(), sub_map()) -> transformed_bb(). +transform_whole_bb({Code, Livein, Liveout}, SubMap) -> + #transformed_bb{ + bb=hipe_bb:mk_bb([I#instr{defuse={smap_get_all_partial(Def, SubMap), + smap_get_all_partial(Use, SubMap)}} + || I = #instr{defuse={Def,Use}} <- Code]) + %% Assume mapping preserves monotonicity + ,livein=smap_get_all_partial(Livein, SubMap) + ,liveout=smap_get_all_partial(Liveout, SubMap) + }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (with split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Discover program partitioning +%% Regretfully, this needs to be a separate pass, as having the global live set +%% is crucial to get a useful partitioning. + +%% Single-block parts are merged if there are multiple in a single block, as it +%% is judged to not be beneficial to make them too small. + +-type part_bb_part() :: {[instr()], target_liveset(), target_liveset()}. +-type part_bb() :: {single, part_bb_part()} + | {split, part_bb_part(), part_bb_part()}. +-type part_bb_list() :: [{label(), part_bb()}]. +-type part_bbs() :: #{label() => part_bb()}. +-type part_bb_sofar() :: single + | {split, [instr()], target_liveset()}. % , target_liveset() + +-spec part_cfg(scan_bbs(), spill_map(), target_reg()) -> part_bb_list(). +part_cfg(ScanBBs, SpillMap, MaxPhys) -> + Liveset = mk_part_liveset(SpillMap, MaxPhys), + lists:map(fun(BB) -> part_bb(BB, Liveset) end, maps:to_list(ScanBBs)). + +-spec part_bb({label(), scan_bb()}, part_liveset()) -> {label(), part_bb()}. +part_bb({L, BB0={Code0, Livein, Liveout}}, Liveset) -> + {Sofar, NewCode} = part_bb_1(lists:reverse(Code0), Liveset, Liveout, []), + BB = case Sofar of + single -> + ?ASSERT(Code0 =:= NewCode), + {single, BB0}; + {split, ExitCode, ExitLivein = EntryLiveout} -> + {split, {NewCode, Livein, EntryLiveout}, + {ExitCode, ExitLivein, Liveout}} + end, + {L, BB}. + +-spec part_bb_1([instr()], part_liveset(), target_liveset(), [instr()]) + -> {part_bb_sofar(), [instr()]}. +part_bb_1([], _Liveset, _Livein, IAcc) -> {single, IAcc}; +part_bb_1([I=#instr{defuse={Def,Use}}|Is], Liveset, Live0, IAcc0) -> + Live = ordsets:union(Use, ordsets:subtract(Live0, Def)), + IAcc = [I|IAcc0], + case part_none_live(Live, Liveset) of + false -> part_bb_1(Is, Liveset, Live, IAcc); + %% One split point will suffice + true -> {{split, IAcc, Live}, lists:reverse(Is)} + end. + +-spec part_none_live(target_liveset(), part_liveset()) -> boolean(). +part_none_live(Live, Liveset) -> + not lists:any(fun(R) -> part_liveset_is_live(R, Liveset) end, Live). + +-type part_liveset() :: {spill_map(), target_reg()}. + +-spec mk_part_liveset(spill_map(), target_reg()) -> part_liveset(). +mk_part_liveset(SpillMap, MaxPhys) -> {SpillMap, MaxPhys}. + +-spec part_liveset_is_live(target_reg(), part_liveset()) -> boolean(). +part_liveset_is_live(R, {SpillMap, MaxPhys}) when is_integer(R) -> + R >= MaxPhys andalso not maps:is_key(R, SpillMap). + +%% @doc Merges split blocks where entry and exit belong to the same DSet. +%% Does not change DSets +-spec merge_pointless_splits(part_bb_list(), scan_bbs(), bb_dsets()) + -> {part_bbs(), bb_dsets()}. +merge_pointless_splits(PartBBList0, ScanBBs, DSets0) -> + {PartBBList, DSets} = + merge_pointless_splits_1(PartBBList0, ScanBBs, DSets0, []), + {maps:from_list(PartBBList), DSets}. + +-spec merge_pointless_splits_1( + part_bb_list(), scan_bbs(), bb_dsets(), part_bb_list()) + -> {part_bb_list(), bb_dsets()}. +merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets}; +merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) -> + merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]); +merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) -> + {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), + {ExitRoot, DSets} = dsets_find({exit,L}, DSets1), + case EntryRoot =:= ExitRoot of + false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]); + true -> + %% Reuse the code list from ScanBBs rather than concatenating the split + %% parts + #{L := BB} = ScanBBs, + ?ASSERT(begin + {L,{split,{_EntryCode,_,_},{_ExitCode,_,_}}}=P0, % [_| + {_Code,_,_}=BB, + _Code =:= (_EntryCode ++ _ExitCode) + end), + merge_pointless_splits_1(Ps, ScanBBs, DSets, [{L,{single, BB}}|Acc]) + end. + +-spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts(DSets0) -> + {RLList, DSets1} = dsets_to_rllist(DSets0), + RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList], + merge_small_parts_1(RLLList, DSets1, []). + +-spec merge_small_parts_1( + [{bb_dset_key(), non_neg_integer(), [bb_dset_key()]}], + bb_dsets(), bb_dsets_rllist() + ) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts_1([], DSets, Acc) -> {Acc, DSets}; +merge_small_parts_1([{R, _, Es}], DSets, Acc) -> {[{R, Es}|Acc], DSets}; +merge_small_parts_1([{R, L, Es}|Ps], DSets, Acc) when L >= ?TUNE_TOO_FEW_BBS -> + merge_small_parts_1(Ps, DSets, [{R,Es}|Acc]); +merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc) + when L >= ?TUNE_TOO_FEW_BBS -> + merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]); +merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) -> + ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS), + DSets1 = dsets_union(R1, R2, DSets0), + {R, DSets} = dsets_find(R1, DSets1), + merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc). + +%% @doc Partition an ordering over BBs into subsequences for the dsets that +%% contain them. +%% Does not change dsets. +-spec part_order([label()], bb_dsets()) + -> {#{bb_dset_key() => [label()]}, bb_dsets()}. +part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}). + +part_order([], DSets, Acc) -> {Acc, DSets}; +part_order([L|Ls], DSets0, Acc0) -> + {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), + {ExitRoot, DSets2} = dsets_find({exit,L}, DSets1), + Acc1 = map_append(EntryRoot, L, Acc0), + %% Only include the label once if both entry and exit is in same partition + Acc2 = case EntryRoot =:= ExitRoot of + true -> Acc1; + false -> map_append(ExitRoot, L, Acc1) + end, + part_order(Ls, DSets2, Acc2). + +map_append(Key, Elem, Map) -> + case Map of + #{Key := List} -> Map#{Key := [Elem|List]}; + #{} -> Map#{Key => [Elem]} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Interference graph partitioning +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% We partition the program + +%% The algorithm considers two kinds of components; those that are local to a +%% basic block, and those that are not. The key is that any basic block belongs +%% to at most two non-local components; one from the beginning to the first +%% split point, and one from the end to the last split point. + +-type bb_dset_key() :: {entry | exit, label()}. +-type bb_dsets() :: dsets(bb_dset_key()). +-type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}]. + +-spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets(). +initial_dsets(CFG, TgtMod, TgtCtx) -> + Labels = TgtMod:labels(CFG, TgtCtx), + DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])), + Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)] + || L <- Labels]), + lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end, + DSets0, Edges). + +-spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets(). +join_whole_blocks(PartBBList, DSets0) -> + lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS); + ({_, {split, _, _}}, DS) -> DS + end, DSets0, PartBBList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The disjoint set forests data structure, for elements of arbitrary types. +%% Note that the find operation mutates the set. +%% +%% We could do this more efficiently if we restricted the elements to integers, +%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used, +%% for a persistent interface (which isn't that nice when even accessors return +%% modified copies), the array module could be used. +-type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}. + +-spec dsets_new([E]) -> dsets(E). +dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]). + +-spec dsets_find(E, dsets(E)) -> {E, dsets(E)}. +dsets_find(E, DS0) -> + case DS0 of + #{E := {root,_}} -> {E, DS0}; + #{E := {node,N}} -> + case dsets_find(N, DS0) of + {N, _}=T -> T; + {R, DS1} -> {R, DS1#{E := {node,R}}} + end + ;_ -> error(badarg, [E, DS0]) + end. + +-spec dsets_union(E, E, dsets(E)) -> dsets(E). +dsets_union(X, Y, DS0) -> + {XRoot, DS1} = dsets_find(X, DS0), + case dsets_find(Y, DS1) of + {XRoot, DS2} -> DS2; + {YRoot, DS2} -> + #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2, + if XRR < YRR -> DS2#{XRoot := {node,YRoot}}; + XRR > YRR -> DS2#{YRoot := {node,XRoot}}; + true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}} + end + end. + +-spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}. +dsets_to_rllist(DS0) -> + {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0), + {maps:to_list(Lists), DS}. + +dsets_to_rllist([], Acc, DS) -> {Acc, DS}; +dsets_to_rllist([E|Es], Acc, DS0) -> + {ERoot, DS} = dsets_find(E, DS0), + dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Collect all referenced temps in each partition. + +%% Note: The temps could be collected during the partition pass for each +%% half-bb, and then combined here. Would that be beneficial? + +collect_seenmap(PartBBsRLList, PartBBs) -> + collect_seenmap(PartBBsRLList, #{}, PartBBs). + +collect_seenmap([], Acc, _PartBBs) -> Acc; +collect_seenmap([{R,Elems}|Ps], Acc, PartBBs) -> + Seen = collect_seen_part(Elems, #{}, PartBBs), + collect_seenmap(Ps, Acc#{R => Seen}, PartBBs). + +collect_seen_part([], Acc, _PartBBs) -> Acc; +collect_seen_part([{Half,L}|Es], Acc0, PartBBs) -> + BB = maps:get(L, PartBBs), + Code = case {Half, BB} of + {entry, {single, {C,_,_}}} -> C; + {entry, {split, {C,_,_}, _}} -> C; + {exit, {split, _, {C,_,_}}} -> C; + {exit, {single, _}} -> [] % Ignore; was collected by its entry half + end, + Acc = collect_seen_code(Code, Acc0), + collect_seen_part(Es, Acc, PartBBs). + +collect_seen_code([], Acc) -> Acc; +collect_seen_code([#instr{defuse={Def,Use}}|Is], Acc) -> + collect_seen_code(Is, add_seen(Def, add_seen(Use, Acc))). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_cfg([bb_dset_key()], part_bbs(), sub_map()) -> transformed_bbs(). + +transform_cfg(Elems, PartBBs, SubMap) -> + transform_cfg(Elems, PartBBs, SubMap, #{}). + +transform_cfg([], _PartBBs, _SubMap, Acc) -> Acc; +transform_cfg([{Half,L}|Es], PartBBs, SubMap, Acc0) -> + #{L := PBB} = PartBBs, + Acc = case {Half, PBB} of + {entry, {single,BB}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {entry, {split,BB,_}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {exit, {split,_,BB}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {exit, {single, _}} -> Acc0 % Was included by the entry half + end, + transform_cfg(Es, PartBBs, SubMap, Acc). + +-spec transform_bb(part_bb_part(), sub_map()) -> transformed_bb(). +transform_bb(BB, SubMap) -> + %% For now, part_bb_part() and split_bb() share representation + transform_whole_bb(BB, SubMap). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Combine colorings and substitute temps in actual cfg if there were +%% collisions. + +%% A temp can sometimes appear in more than one partition. For example, defining +%% an unused value. If these are found by combine_allocations, we have to +%% rename this temp in one of the partitions on the real cfg. +%% +%% We optimistically assume that there will be no such collisions, and when +%% there are, we fix them up as they're found. + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), + part_bbs(), module(), target_context(), target_cfg()) + -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([{A,_}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, CFG) -> + {Phys, Pseuds} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), + {Seen, _, []} = partition_by_seen(Pseuds, #{}, [], []), + combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, Pseuds, + {same, CFG}). + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), + part_bbs(), module(), target_context(), hipe_map(), + seen(), hipe_map(), {same|rewritten, target_cfg()}) + -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([], _MaxPhys, _PartBBs, _TgtMod, _TgtCtx, Phys, _Seen, + Pseuds, CFGT) -> + {Phys ++ Pseuds, case CFGT of + {same, _} -> same; + {rewritten, _} -> CFGT + end}; +combine_allocations([{A,PartElems}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, + Seen0, Acc, CFGT={_,CFG0}) -> + {Phys, Pseuds0} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), + {Seen, Pseuds, Collisions} = partition_by_seen(Pseuds0, Seen0, [], []), + case Collisions of + [] -> combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, + Pseuds++Acc, CFGT); + _ -> + %% There were collisions; rename all the temp numbers in Collisions + {CFG, Renamed} = rename(Collisions, PartElems, PartBBs, TgtMod, TgtCtx, + CFG0), + combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, + Pseuds++Renamed++Acc, {rewritten,CFG}) + end. + +%% @doc Partitions a coloring on whether the registers are in the Seen set, +%% adding any new registers to the set. +-spec partition_by_seen(hipe_map(), seen(), hipe_map(), hipe_map()) + -> {seen(), hipe_map(), hipe_map()}. +partition_by_seen([], Seen, Acc, Collisions) -> {Seen, Acc, Collisions}; +partition_by_seen([C={R,_}|Cs], Seen, Acc, Colls) -> + case Seen of + #{R := _} -> partition_by_seen(Cs, Seen, Acc, [C|Colls]); + #{} -> partition_by_seen(Cs, Seen#{R => []}, [C|Acc], Colls) + end. + +-spec rename(hipe_map(), [bb_dset_key()], part_bbs(), module(), + target_context(), target_cfg()) + -> {target_cfg(), hipe_map()}. +rename(CollisionList, PartElems, PartBBs, TgtMod, TgtCtx, CFG0) -> + {Map, Renamed} = new_names(CollisionList, TgtMod, TgtCtx, #{}, []), + Fun = fun(I) -> + TgtMod:subst_temps( + fun(Temp) -> + N = TgtMod:reg_nr(Temp, TgtCtx), + case Map of + #{N := Subst} -> TgtMod:update_reg_nr(Subst, Temp, TgtCtx); + #{} -> Temp + end + end, I, TgtCtx) + end, + {rename_1(PartElems, PartBBs, TgtMod, TgtCtx, Fun, CFG0), Renamed}. + +-type rename_map() :: #{target_reg() => target_reg()}. +-type rename_fun() :: fun((target_instr()) -> target_instr()). + +-spec new_names(hipe_map(), module(), target_context(), rename_map(), + hipe_map()) + -> {rename_map(), hipe_map()}. +new_names([], _TgtMod, _TgtCtx, Map, Renamed) -> {Map, Renamed}; +new_names([{R,C}|As], TgtMod, TgtCtx, Map, Renamed) -> + Subst = TgtMod:new_reg_nr(TgtCtx), + new_names(As, TgtMod, TgtCtx, Map#{R => Subst}, [{Subst, C} | Renamed]). + +%% @doc Maps over all instructions in a partition on the original CFG. +-spec rename_1([bb_dset_key()], part_bbs(), module(), target_context(), + rename_fun(), target_cfg()) -> target_cfg(). +rename_1([], _PartBBs, _TgtMod, _TgtCtx, _Fun, CFG) -> CFG; +rename_1([{Half,L}|Es], PartBBs, TgtMod, TgtCtx, Fun, CFG0) -> + Code0 = hipe_bb:code(BB = TgtMod:bb(CFG0, L, TgtCtx)), + Code = case {Half, maps:get(L, PartBBs)} of + {entry, {single,_}} -> lists:map(Fun, Code0); + {entry, {split,PBBP,_}} -> + map_start(Fun, part_bb_part_len(PBBP), Code0); + {exit, {split,_,PBBP}} -> + map_end(Fun, part_bb_part_len(PBBP), Code0); + {exit, {single, _}} -> Code0 + end, + CFG = TgtMod:update_bb(CFG0, L, hipe_bb:code_update(BB, Code), TgtCtx), + rename_1(Es, PartBBs, TgtMod, TgtCtx, Fun, CFG). + +-spec part_bb_part_len(part_bb_part()) -> non_neg_integer(). +part_bb_part_len({Code, _Livein, _Liveout}) -> length(Code). + +%% @doc Map the first N elements of a list +-spec map_start(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_start(_Fun, 0, List) -> List; +map_start(Fun, N, [E|Es]) -> + [Fun(E)|map_start(Fun, N-1, Es)]. + +%% @doc Map the last N elements of a list +-spec map_end(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_end(Fun, N, List) -> + map_end(Fun, N, length(List), List). + +map_end(Fun, N, Len, [E|Es]) when Len > N -> [E|map_end(Fun, N, Len-1, Es)]; +map_end(Fun, N, Len, List) when Len =:= N -> lists:map(Fun, List). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Temp map ADT +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-type sub_map() :: {s,#{target_reg() => temp()}}. +-type inv_map() :: {i,#{temp() => target_reg()}}. + +-spec smap_get(target_reg(), sub_map()) -> temp(). +smap_get(Temp, {s,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec imap_get(temp(), inv_map()) -> target_reg(). +imap_get(Temp, {i,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec smap_get_all_partial([target_reg()], sub_map()) -> [temp()]. +smap_get_all_partial([], _) -> []; +smap_get_all_partial([T|Ts], SMap={s,Map}) when is_integer(T) -> + case Map of + #{T := R} -> [R|smap_get_all_partial(Ts, SMap)]; + #{} -> smap_get_all_partial(Ts, SMap) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Validation +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-ifdef(DO_ASSERT). +%%%%%%%%%%%%%%%%%%%% +%% Check that the coloring is correct (if the IG is correct): +%% + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +check_coloring(Coloring, CFG, TgtMod, TgtCtx) -> + ?report0("checking coloring ~p~n",[Coloring]), + IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), + check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)), + init_coloring(Coloring, TgtMod, TgtCtx)). + +init_coloring(Xs, TgtMod, TgtCtx) -> + hipe_temp_map:cols2tuple(Xs, TgtMod, TgtCtx). + +check_color_of(X, Cols) -> + case hipe_temp_map:find(X, Cols) of + unknown -> + uncolored; + C -> + C + end. + +check_cols([], _Cols) -> + ?report("coloring valid~n",[]), + true; +check_cols([{X,Neighbours}|Xs], Cols) -> + Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours], + C = check_color_of(X, Cols), + case valid_coloring(X, C, Cs) of + yes -> + check_cols(Xs, Cols); + {no,Invalids} -> + ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), + check_cols(Xs, Cols) andalso false + end. + +valid_coloring(_X, _C, []) -> + yes; +valid_coloring(X, C, [{Y,C}|Ys]) -> + case valid_coloring(X, C, Ys) of + yes -> {no, [Y]}; + {no,Zs} -> {no, [Y|Zs]} + end; +valid_coloring(X, C, [_|Ys]) -> + valid_coloring(X, C, Ys). + +unused_unused(Unused, CFG, TgtMod, TgtCtx) -> + IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), + lists:all(fun(R) -> case hipe_ig:get_node_degree(R, IG) of + 0 -> true; + Deg -> + ?msg("Temp ~w is in unused but has degree ~w~n", + [R, Deg]), + false + end end, Unused). + +%%%%%%%%%%%%%%%%%%%% +%% Check that no register allocation opportunities were missed due to ?MODULE +%% +just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, + TgtCtx, Options, SpillMap, Coloring, Unused) -> + {CheckColoring, _} = + RegAllocMod:regalloc(CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, TgtCtx, + Options), + Now = lists:sort([{R,Kind} || {R,{Kind,_}} <- Coloring, + not ordsets:is_element(R, Unused)]), + Check = lists:sort([{R,Kind} || {R,{Kind,_}} <- CheckColoring, + not ordsets:is_element(R, Unused)]), + CheckMap = maps:from_list(Check), + SaneSpills = all_spills_sane_1(CheckColoring, SpillMap), + case SaneSpills + andalso lists:all(fun({R, spill}) -> maps:get(R, CheckMap) =:= spill; + ({_,reg}) -> true + end, Now) + of + true -> true; + false -> + {NowRegs, _} = _NowCount = count(Now), + {CheckRegs, _} = _CheckCount = count(Check), + {M,F,A} = element(2, element(3, CFG)), + io:fwrite(standard_error, "Colorings differ (~w, ~w)!~n" + "MFA: ~w:~w/~w~n" + "Unused: ~w~n" + "Now:~w~nCorrect:~w~n", + [TgtMod, RegAllocMod, + M,F,A, + Unused, + Now -- Check, Check -- Now]), + SaneSpills andalso NowRegs >= CheckRegs + end. + +count(C) -> {length([[] || {_, reg} <- C]), + length([[] || {_, spill} <- C])}. + +unused(LivePseudos, SpillMap, CFG, TgtMod, TgtCtx) -> + {TMin, TMax} = TgtMod:var_range(CFG,TgtCtx), + SpillOSet = ordsets:from_list(maps:keys(SpillMap)), + PhysOSet = ordsets:from_list(TgtMod:all_precoloured(TgtCtx)), + Used = ordsets:union(LivePseudos, ordsets:union(PhysOSet, SpillOSet)), + ordsets:subtract(lists:seq(TMin, TMax), Used). + +%% Check that no temp that we wrote off was actually allocatable. +all_spills_sane_1(_, Empty) when map_size(Empty) =:= 0 -> true; +all_spills_sane_1([], _Nonempty) -> false; +all_spills_sane_1([{T, {reg, _}}|Cs], SpillMap) -> + not maps:is_key(T, SpillMap) andalso all_spills_sane_1(Cs, SpillMap); +all_spills_sane_1([{T, {spill, _}}|Cs], SpillMap) -> + all_spills_sane_1(Cs, maps:remove(T, SpillMap)). + +-endif. % DO_ASSERT + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Pseudo-target interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +analyze(Cfg, _ModRec) -> Cfg. +bb(Cfg=#cfg{bbs=BBs}, Ix, _ModRec) -> + case BBs of + #{Ix := #transformed_bb{bb=BB}} -> BB; + _ -> error(badarg, [Cfg, Ix]) + end. +args(Arity, #prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx, sub=SubM}) -> + smap_get(TgtMod:args(Arity,TgtCtx), SubM). +labels(#cfg{bbs=BBs}, _ModRec) -> maps:keys(BBs). +livein(#cfg{bbs=BBs}, Lb, _SubMod) -> + #{Lb := #transformed_bb{livein=Livein}} = BBs, + Livein. +liveout(#cfg{bbs=BBs}, Lb, _SubMod) -> + #{Lb := #transformed_bb{liveout=Liveout}} = BBs, + Liveout. +uses(I, MR) -> element(2, def_use(I, MR)). +defines(I, MR) -> element(1, def_use(I, MR)). +def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse. +is_move(#instr{is_move=IM}, _ModRec) -> IM. +is_fixed(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx,inv=InvM}) -> + TgtMod:is_fixed(imap_get(Reg, InvM),TgtCtx). % XXX: Is this hot? +is_global(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, + max_phys=MaxPhys}) when Reg < MaxPhys -> + TgtMod:is_global(Reg,TgtCtx). % assume id-map +is_precoloured(Reg, #prepass_ctx{max_phys=MaxPhys}) -> Reg < MaxPhys. +reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally) +non_alloc(#cfg{cfg=CFG}, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, + sub=SubM}) -> + smap_get_all_partial(reg_names(TgtMod:non_alloc(CFG,TgtCtx), TgtMod, TgtCtx), + SubM). +number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR. +allocatable(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> + TgtMod:allocatable(TgtCtx). % assume id-map +physical_name(Reg, _ModRec) -> Reg. +all_precoloured(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). % dito +var_range(#cfg{cfg=_CFG, max_reg=MaxReg}, + #prepass_ctx{target_mod=_TgtMod, target_ctx=_TgtCtx}) -> + ?ASSERT(begin {TgtMin, _} = _TgtMod:var_range(_CFG,_TgtCtx), + TgtMin =:= 0 + end), + {0, MaxReg-1}. + +postorder(#cfg{cfg=CFG,rpostorder=undefined}, + #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> + TgtMod:postorder(CFG,TgtCtx); +postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> + lists:reverse(Labels). + +reverse_postorder(#cfg{cfg=CFG,rpostorder=undefined}, + #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> + TgtMod:reverse_postorder(CFG,TgtCtx); +reverse_postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> + Labels. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 8d34604f84..4c575c1c83 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -22,114 +22,123 @@ -module(hipe_sparc_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). %% same as hipe_sparc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_sparc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_sparc_liveness_gpr:livein(Liveness,L), hipe_sparc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_sparc_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_sparc:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_sparc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_sparc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_sparc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_sparc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(sparc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_sparc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_sparc_cfg:bb_add(CFG,L,BB). + %% SPARC stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_sparc_defuse:insn_use_gpr(I), hipe_sparc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_sparc_defuse:insn_def_gpr(I), hipe_sparc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_sparc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_sparc:is_pseudo_move(Instruction) of true -> Dst = hipe_sparc:pseudo_move_dst(Instruction), @@ -142,28 +151,45 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +new_reg_nr(_) -> + hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, Temp, _) -> + hipe_sparc:mk_temp(Nr, hipe_sparc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_sparc_subst:insn_temps( + fun(Op) -> + case hipe_sparc:temp_is_allocatable(Op) + andalso hipe_sparc:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_sparc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_sparc_registers:temp1() orelse R =:= hipe_sparc_registers:temp2() orelse R =:= hipe_sparc_registers:temp3() orelse hipe_sparc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_sparc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_sparc_registers:args(hipe_sparc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 2edd3cb47e..0334142b95 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -22,126 +22,152 @@ -module(hipe_sparc_specific_fp). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_sparc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> hipe_sparc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> hipe_sparc_liveness_fpr:liveout(BB_in_out_liveness, Label). %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_sparc_registers:allocatable_fpr(). -all_precoloured() -> - allocatable(). +all_precoloured(Ctx) -> + allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_sparc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(sparc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_sparc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_sparc_cfg:bb_add(CFG,L,BB). + %% SPARC stuff -def_use(I) -> - {defines(I), uses(I)}. +def_use(I, Ctx) -> + {defines(I,Ctx), uses(I,Ctx)}. -uses(I) -> +uses(I, _) -> hipe_sparc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) -> hipe_sparc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> + hipe_sparc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) -> hipe_sparc:is_pseudo_fmove(I). -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +new_reg_nr(_) -> + hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, _Temp, _) -> + hipe_sparc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_sparc_subst:insn_temps( + fun(Op) -> + case hipe_sparc:temp_is_allocatable(Op) + andalso hipe_sparc:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + -ifdef(notdef). -new_spill_index(SpillIndex)-> +new_spill_index(SpillIndex, _)-> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_sparc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) -> false. -is_arg(_R) -> +is_arg(_R, _) -> false. -args(_CFG) -> +args(_CFG, _) -> []. -endif. diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl index 4085a0e1a7..b683d08054 100644 --- a/lib/hipe/regalloc/hipe_temp_map.erl +++ b/lib/hipe/regalloc/hipe_temp_map.erl @@ -33,10 +33,12 @@ -module(hipe_temp_map). --export([cols2tuple/2, is_spilled/2, to_substlist/1]). +-export([cols2tuple/3, find/2, is_spilled/2, to_substlist/1]). -include("../main/hipe.hrl"). +-type target_context() :: any(). + %%---------------------------------------------------------------------------- %% Convert a list of [{R0, C1}, {R1, C2}, ...] to a temp_map %% (Currently implemented as a tuple) tuple {C1, C2, ...}. @@ -47,34 +49,32 @@ %% element 1 %%---------------------------------------------------------------------------- --spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map(). +-spec cols2tuple(hipe_map(), module(), target_context()) -> hipe_temp_map(). -cols2tuple(Map, Target) -> - ?ASSERT(check_list(Map)), - SortedMap = lists:keysort(1, Map), - cols2tuple(0, SortedMap, [], Target). +cols2tuple(Map, TgtMod, TgtCtx) -> + SortedMap = lists:keysort(1, Map), + cols2tuple(0, SortedMap, [], TgtMod, TgtCtx). -%% sorted_cols2tuple(Map, Target) -> -%% ?ASSERT(check_list(Map)), +%% sorted_cols2tuple(Map, TgtMod, TgtCtx) -> %% ?ASSERT(Map =:= lists:keysort(1, Map)), -%% cols2tuple(0, Map, [], Target). +%% cols2tuple(0, Map, [], TgtMod, TgtCtx). %% Build a dense mapping -cols2tuple(_, [], Vs, _) -> +cols2tuple(_, [], Vs, _, _) -> %% Done reverse the list and convert to tuple. list_to_tuple(lists:reverse(Vs)); -cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R -> +cols2tuple(N, [{R, C}|Ms], Vs, TgtMod, TgtCtx) when N =:= R -> %% N makes sure the mapping is dense. N is he next key. - cols2tuple(N+1, Ms, [C|Vs], Target); -cols2tuple(N, SourceMapping, Vs, Target) -> + cols2tuple(N+1, Ms, [C|Vs], TgtMod, TgtCtx); +cols2tuple(N, SourceMapping=[{R,_}|_], Vs, TgtMod, TgtCtx) when N < R -> %% The source was sparse, make up some placeholders... Val = - case Target:is_precoloured(N) of + case TgtMod:is_precoloured(N, TgtCtx) of %% If it is precoloured, we know what to map it to. true -> {reg, N}; false -> unknown end, - cols2tuple(N+1, SourceMapping, [Val|Vs], Target). + cols2tuple(N+1, SourceMapping, [Val|Vs], TgtMod, TgtCtx). %% %% True if temp Temp is spilled. @@ -82,7 +82,7 @@ cols2tuple(N, SourceMapping, Vs, Target) -> -spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean(). is_spilled(Temp, Map) -> - case element(Temp+1, Map) of + case find(Temp, Map) of {reg, _R} -> false; {fp_reg, _R}-> false; {spill, _N} -> true; @@ -106,9 +106,10 @@ is_spilled(Temp, Map) -> %% {spill, _N} -> false; %% unknown -> false %% end. -%% -%% %% Returns the inf temp Temp is mapped to. -%% find(Temp, Map) -> element(Temp+1, Map). + +%% Returns the inf temp Temp is mapped to. +find(Temp, Map) when Temp < tuple_size(Map) -> element(Temp+1, Map); +find(_, Map) when is_tuple(Map) -> unknown. % consistency with cols2tuple/3 %% diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index 4edf8674b7..67c45cdca5 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -25,100 +25,105 @@ -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). -define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_SUBST, hipe_amd64_subst). -else. -define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). -define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_SUBST, hipe_x86_subst). -endif. -module(?HIPE_X86_SPECIFIC). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]). %% The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_x86_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - def_use/1, - is_arg/1, % used by hipe_ls_regalloc - is_move/1, - is_fixed/1, % used by hipe_graph_coloring_regalloc - is_global/1, - is_precoloured/1, - reg_nr/1, - non_alloc/1, - allocatable/0, - physical_name/1, - all_precoloured/0, - new_spill_index/1, % used by hipe_ls_regalloc - var_range/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + def_use/2, + is_arg/2, % used by hipe_ls_regalloc + is_move/2, + is_fixed/2, % used by hipe_graph_coloring_regalloc + is_global/2, + is_precoloured/2, + reg_nr/2, + non_alloc/2, + allocatable/1, + physical_name/2, + all_precoloured/1, + new_spill_index/2, % used by hipe_ls_regalloc + var_range/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> - ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). %% Globally defined registers for linear scan -is_global(R) -> +is_global(R, _) -> ?HIPE_X86_REGISTERS:temp1() =:= R orelse ?HIPE_X86_REGISTERS:temp0() =:= R orelse ?HIPE_X86_REGISTERS:is_fixed(R). -is_fixed(R) -> +is_fixed(R, _) -> ?HIPE_X86_REGISTERS:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> ?HIPE_X86_REGISTERS:is_arg(R). -args(CFG) -> +args(CFG, _) -> ?HIPE_X86_REGISTERS:args(hipe_x86_cfg:arity(CFG)). -non_alloc(CFG) -> - non_alloc(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). +non_alloc(CFG, _) -> + non_alloc_1(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). %% same as hipe_x86_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:heap_limit(), hipe_x86:temp_type(X) =/= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), @@ -127,37 +132,40 @@ liveout(BB_in_out_liveness,Label) -> %% Registers stuff -allocatable() -> +allocatable(_) -> ?HIPE_X86_REGISTERS:allocatable(). -all_precoloured() -> +all_precoloured(_) -> ?HIPE_X86_REGISTERS:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg,_) -> ?HIPE_X86_REGISTERS:is_precoloured(Reg). -physical_name(Reg) -> +physical_name(Reg,_) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG,_) -> hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG,_) -> hipe_gensym:var_range(x86). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG,_) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_x86_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_x86_cfg:bb_add(CFG,L,BB). + %% X86 stuff -def_use(Instruction) -> +def_use(Instruction,_) -> {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double'], @@ -166,17 +174,19 @@ def_use(Instruction) -> hipe_x86:temp_type(X) =/= 'double'] }. -uses(I) -> +uses(I,_) -> [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double']. -defines(I) -> +defines(I,_) -> [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double']. -is_move(Instruction) -> +defines_all_alloc(I,_) -> ?HIPE_X86_DEFUSE:insn_defs_all(I). + +is_move(Instruction,_) -> case hipe_x86:is_move(Instruction) of true -> Src = hipe_x86:move_src(Instruction), @@ -197,8 +207,25 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +reg_nr(Reg,_) -> hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_reg_nr(_) -> + hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, Temp, _) -> + hipe_x86:mk_temp(Nr, hipe_x86:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + ?HIPE_X86_SUBST:insn_temps( + fun(Op) -> + case hipe_x86:temp_is_allocatable(Op) + andalso hipe_x86:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index ece07cb2f9..85923f8f44 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -32,110 +32,118 @@ -endif. -module(?HIPE_X86_SPECIFIC_X87). --export([allocatable/0, - is_precoloured/1, - %% var_range/1, - %% def_use/1, - %% is_fixed/1, - is_arg/1, - %% non_alloc/1, - new_spill_index/1, - number_of_temporaries/1 +-export([allocatable/2, + is_precoloured/2, + %% var_range/2, + %% def_use/2, + %% is_fixed/2, + is_arg/2, + %% non_alloc/2, + new_spill_index/2, + number_of_temporaries/2 ]). %% The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_x86_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - is_global/1, - reg_nr/1, - physical_name/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). - -breadthorder(CFG) -> +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + is_global/2, + reg_nr/2, + physical_name/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). + +%% callbacks for hipe_x86_ra_ls +-export([check_and_rewrite/4]). + +%% Rewrite happens in hipe_x86_ra_finalise:finalise/4 +check_and_rewrite(CFG, _Coloring, 'linearscan', _) -> + {CFG, false}. + +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -is_global(_) -> +is_global(_, _) -> false. -ifdef(notdef). -is_fixed(_) -> +is_fixed(_, _) -> false. -endif. -is_arg(_) -> +is_arg(_, _) -> false. -args(_) -> +args(_, _) -> []. -ifdef(notdef). -non_alloc(_) -> +non_alloc(_, _) -> []. -endif. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. %% Registers stuff -allocatable() -> +allocatable('linearscan', _) -> ?HIPE_X86_REGISTERS:allocatable_x87(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> ?HIPE_X86_REGISTERS:is_precoloured_x87(Reg). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_x86_cfg:labels(CFG). -ifdef(notdef). -var_range(_CFG) -> +var_range(_CFG, _) -> {Min,Max} = hipe_gensym:var_range(x86), %% io:format("Var_range: ~w\n",[{Min,Max}]), {Min,Max}. -endif. -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_x86_cfg:bb(CFG,L). %% X86 stuff -ifdef(notdef). -def_use(Instruction) -> +def_use(Instruction, _) -> {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), temp_is_double(X)], @@ -145,21 +153,23 @@ def_use(Instruction) -> }. -endif. -uses(I) -> +uses(I, _) -> [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), hipe_x86:temp_is_allocatable(X), temp_is_double(X)]. -defines(I) -> +defines(I, _) -> [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), hipe_x86:temp_is_allocatable(X), temp_is_double(X)]. +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). + temp_is_double(Temp) -> hipe_x86:temp_type(Temp) =:= 'double'. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) -> SpillIndex+1. diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl index fb9c0c196d..ad23df80d2 100644 --- a/lib/hipe/rtl/hipe_rtl_binary.erl +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -106,10 +106,20 @@ create_lbls(0) -> %%------------------------------------------------------------------------------ get_word_integer(Var, Register, SystemLimitLblName, FalseLblName) -> - [EndLbl] = create_lbls(1), - EndName = hipe_rtl:label_name(EndLbl), - get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, EndName, EndName, - [EndLbl]). + case hipe_rtl:is_imm(Var) of + true -> + TaggedVal = hipe_rtl:imm_value(Var), + true = hipe_tagscheme:is_fixnum(TaggedVal), + Val = hipe_tagscheme:fixnum_val(TaggedVal), + if Val < 0 -> [hipe_rtl:mk_goto(FalseLblName)]; + true -> [hipe_rtl:mk_move(Register, hipe_rtl:mk_imm(Val))] + end; + false -> + [EndLbl] = create_lbls(1), + EndName = hipe_rtl:label_name(EndLbl), + get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, + EndName, EndName, [EndLbl]) + end. get_word_integer(Var, Register, SystemLimitLblName, FalseLblName, TrueLblName, BigLblName, Tail) -> diff --git a/lib/hipe/rtl/hipe_rtl_primops.erl b/lib/hipe/rtl/hipe_rtl_primops.erl index 062fab842f..835f489ec0 100644 --- a/lib/hipe/rtl/hipe_rtl_primops.erl +++ b/lib/hipe/rtl/hipe_rtl_primops.erl @@ -845,7 +845,7 @@ gen_free_vars([], _, _, _, AccCode) -> AccCode. %% call_fun (also handles enter_fun when Continuation = []) gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> - NAddressReg = hipe_rtl:mk_new_reg(), + NCNAddressReg = hipe_rtl:mk_new_reg(), ArityReg = hipe_rtl:mk_new_reg_gcsafe(), [Fun|RevArgs] = lists:reverse(ArgsAndFun), @@ -856,7 +856,7 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> BadFunLabName = hipe_rtl:label_name(NonClosureLabel), BadFunCode = [NonClosureLabel, - hipe_rtl:mk_call([NAddressReg], + hipe_rtl:mk_call([NCNAddressReg], 'nonclosure_address', [Fun, hipe_rtl:mk_imm(length(Args))], hipe_rtl:label_name(CallNonClosureLabel), @@ -865,25 +865,26 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> CallNonClosureLabel, case Continuation of [] -> - hipe_rtl:mk_enter(NAddressReg, Args, not_remote); + hipe_rtl:mk_enter(NCNAddressReg, Args, not_remote); _ -> - hipe_rtl:mk_call(Dst, NAddressReg, Args, + hipe_rtl:mk_call(Dst, NCNAddressReg, Args, Continuation, Fail, not_remote) end], {BadArityLabName, BadArityCode} = gen_fail_code(Fail, {badarity, Fun}), - CheckGetCode = - hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, NAddressReg, + CNAddressReg = hipe_rtl:mk_new_reg(), + CheckGetCode = + hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, CNAddressReg, Fun, BadFunLabName, 0.9), CheckArityCode = check_arity(ArityReg, length(RevArgs), BadArityLabName), CallCode = case Continuation of [] -> %% This is a tailcall - [hipe_rtl:mk_enter(NAddressReg, ArgsAndFun, not_remote)]; + [hipe_rtl:mk_enter(CNAddressReg, ArgsAndFun, not_remote)]; _ -> %% Ordinary call - [hipe_rtl:mk_call(Dst, NAddressReg, ArgsAndFun, + [hipe_rtl:mk_call(Dst, CNAddressReg, ArgsAndFun, Continuation, Fail, not_remote)] end, [CheckGetCode, CheckArityCode, CallCode, BadFunCode, BadArityCode]. diff --git a/lib/hipe/sparc/Makefile b/lib/hipe/sparc/Makefile index 0e36a43d8e..ac1230df7c 100644 --- a/lib/hipe/sparc/Makefile +++ b/lib/hipe/sparc/Makefile @@ -63,7 +63,8 @@ MODULES=hipe_rtl_to_sparc \ hipe_sparc_ra_naive \ hipe_sparc_ra_postconditions \ hipe_sparc_ra_postconditions_fp \ - hipe_sparc_registers + hipe_sparc_registers \ + hipe_sparc_subst HRL_FILES=hipe_sparc.hrl ERL_FILES=$(MODULES:%=%.erl) diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl index f9c043eafe..e170fec3d6 100644 --- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -625,7 +625,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), % no immediates allowed + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StOp = conv_stop(hipe_rtl:store_size(I)), @@ -649,13 +649,27 @@ mk_store(StOp, Src, Base1, Base2) -> end. mk_store2(StOp, Src, Base1, Base2) -> - case hipe_sparc:is_temp(Base2) of + case hipe_sparc:is_temp(Base1) of true -> - mk_store_rr(StOp, Src, Base1, Base2); + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_rr(StOp, Src, Base1, Base2); + _ -> + mk_store_ri(StOp, Src, Base1, Base2) + end; _ -> - mk_store_ri(StOp, Src, Base1, Base2) + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_ri(StOp, Src, Base2, Base1); + _ -> + mk_store_ii(StOp, Src, Base1, Base2) + end end. +mk_store_ii(StOp, Src, Base, Disp) -> + Tmp = new_untagged_temp(), + mk_set(Base, Tmp, mk_store_ri(StOp, Src, Tmp, Disp)). + mk_store_ri(StOp, Src, Base, Disp) -> hipe_sparc:mk_store(StOp, Src, Base, Disp, 'new', []). diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl index 0b2c77f27b..957c8a0d24 100644 --- a/lib/hipe/sparc/hipe_sparc_cfg.erl +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -24,6 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1]). diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl index 4f66299f1d..4b5a19a19d 100644 --- a/lib/hipe/sparc/hipe_sparc_defuse.erl +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -23,6 +23,7 @@ -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -include("hipe_sparc.hrl"). %%% @@ -51,6 +52,12 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_sparc:mk_temp(R, T) || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -115,6 +122,12 @@ insn_def_fpr(I) -> _ -> [] end. +insn_defs_all_fpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_fpr() -> [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()]. diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index a42c1983f4..37f29e660a 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -25,16 +25,14 @@ -include("hipe_sparc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_sparc:defun_formals(Defun)), - Temps0 = all_temps(hipe_sparc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_sparc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersRA = clobbers_ra(hipe_sparc:defun_code(Defun)), - CFG0 = hipe_sparc_cfg:init(Defun), - Liveness = hipe_sparc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersRA), - hipe_sparc_cfg:linearise(CFG1). + ClobbersRA = clobbers_ra(CFG), + Liveness = hipe_sparc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersRA). fix_formals(Formals) -> fix_formals(hipe_sparc_registers:nr_args(), Formals). @@ -550,29 +548,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers RA. %%% -clobbers_ra(Insns) -> - case Insns of - [#pseudo_call{}|_] -> true; - %% moves to RA cannot occur yet - [_|Rest] -> clobbers_ra(Rest); - [] -> false +clobbers_ra(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_sparc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_sparc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -601,16 +611,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_sparc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_sparc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_sparc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/sparc/hipe_sparc_main.erl b/lib/hipe/sparc/hipe_sparc_main.erl index c16751c7bd..8e9c560bb2 100644 --- a/lib/hipe/sparc/hipe_sparc_main.erl +++ b/lib/hipe/sparc/hipe_sparc_main.erl @@ -24,12 +24,14 @@ rtl_to_sparc(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_sparc:translate(RTL), + CFG1 = hipe_sparc_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun1), - Defun2 = hipe_sparc_ra:ra(Defun1, Options), + CFG2 = hipe_sparc_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_sparc_pp:pp(Defun2), - Defun3 = hipe_sparc_frame:frame(Defun2), + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + CFG3 = hipe_sparc_frame:frame(CFG2), + Defun3 = hipe_sparc_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun3), Defun4 = hipe_sparc_finalise:finalise(Defun3), diff --git a/lib/hipe/sparc/hipe_sparc_ra.erl b/lib/hipe/sparc/hipe_sparc_ra.erl index afea8c9b4c..c4b909528d 100644 --- a/lib/hipe/sparc/hipe_sparc_ra.erl +++ b/lib/hipe/sparc/hipe_sparc_ra.erl @@ -22,36 +22,39 @@ -module(hipe_sparc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_sparc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + FPLiveness0 = hipe_sparc_specific_fp:analyze(CFG0, no_context), + hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, hipe_coalescing_regalloc, - hipe_sparc_specific_fp); + hipe_sparc_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_sparc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG1)), + GPLiveness1 = hipe_sparc_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - hipe_sparc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_sparc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_sparc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_sparc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_sparc_pp:pp(Defun2), - hipe_sparc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + hipe_sparc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_sparc_specific, no_context). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl index dc1e69c101..5d6056071c 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -23,13 +23,14 @@ -export([finalise/3]). -include("hipe_sparc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_sparc:defun_code(Defun), - {_, SpillLimit} = hipe_sparc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(sparc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_sparc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/sparc/hipe_sparc_ra_ls.erl b/lib/hipe/sparc/hipe_sparc_ra_ls.erl index 19e7c92d2f..7019937737 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_ls.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_ls.erl @@ -21,37 +21,35 @@ %% Linear Scan register allocator for SPARC -module(hipe_sparc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_sparc_cfg:init(NewDefun), - SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_sparc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_sparc_registers:allocatable_gpr()-- [hipe_sparc_registers:temp3(), hipe_sparc_registers:temp2(), hipe_sparc_registers:temp1()], [hipe_sparc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_sparc_specific), - {NewDefun, _DidSpill} = + hipe_sparc_specific, no_context), + {NewCFG, _DidSpill} = hipe_sparc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_sparc_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_sparc_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/sparc/hipe_sparc_ra_naive.erl b/lib/hipe/sparc/hipe_sparc_ra_naive.erl index b6c33dec6c..745e44f2f9 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_naive.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_naive.erl @@ -20,11 +20,11 @@ %% -module(hipe_sparc_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_sparc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_sparc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_sparc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl index ab31b3c8d9..e8e231e35c 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -25,17 +25,13 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl index d893ac26e9..544b8b05a8 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -25,13 +25,17 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. +check_and_rewrite(CFG, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp, + no_context), + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl index 6681a10070..20138836dd 100644 --- a/lib/hipe/sparc/hipe_sparc_registers.erl +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -249,6 +249,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_sparc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [%% ?G0 is the non-allocatable constant zero {?G1,tagged},{?G1,untagged}, diff --git a/lib/hipe/sparc/hipe_sparc_subst.erl b/lib/hipe/sparc/hipe_sparc_subst.erl new file mode 100644 index 0000000000..e5cd244985 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_subst.erl @@ -0,0 +1,85 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_sparc_subst). +-export([insn_temps/2]). +-include("hipe_sparc.hrl"). + +%% These should be moved to hipe_sparc and exported +-type temp() :: #sparc_temp{}. +-type src2() :: temp() | #sparc_simm13{}. +-type src2b() :: src2() | #sparc_uimm5{}. +-type funv() :: #sparc_mfa{} | #sparc_prim{} | temp(). +-type arg() :: temp() | integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + S2 = fun(O) -> src2_temps(T, O) end, + S2B = fun(O) -> src2b_temps(T, O) end, + Arg = fun(O) -> arg_temps(T, O) end, + case I of + #alu{src1=L,src2=R,dst=D} -> I#alu{src1=T(L),src2=S2B(R),dst=T(D)}; + #bp{} -> I; + #comment{} -> I; + #jmp{src1=L,src2=R} -> I#jmp{src1=T(L),src2=S2(R)}; + #label{} -> I; + #pseudo_bp{} -> I; + #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T,F)}; + #pseudo_call_prepare{} -> I; + #pseudo_move{src=S,dst=D} -> I#pseudo_move{src=T(S),dst=T(D)}; + #pseudo_ret{} -> I; + #pseudo_set{dst=D}-> I#pseudo_set{dst=T(D)}; + #pseudo_tailcall{funv=F,stkargs=Stk} -> + I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #rdy{dst=D} -> I#rdy{dst=T(D)}; + #sethi{dst=D} -> I#sethi{dst=T(D)}; + #store{src=S,base=B,disp=D} -> I#store{src=T(S),base=T(B),disp=S2(D)}; + #fp_binary{src1=L,src2=R,dst=D} -> + I#fp_binary{src1=T(L),src2=T(R),dst=T(D)}; + #fp_unary{src=S,dst=D} -> I#fp_unary{src=T(S),dst=T(D)}; + #pseudo_fload{base=B,disp=Di,dst=Ds} -> + I#pseudo_fload{base=T(B),disp=S2(Di),dst=T(Ds)}; + #pseudo_fmove{src=S,dst=D} -> I#pseudo_fmove{src=T(S),dst=T(D)}; + #pseudo_fstore{src=S,base=B,disp=D} -> + I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)} + end. + +-spec src2_temps(subst_fun(), src2()) -> src2(). +src2_temps(_SubstTemp, I=#sparc_simm13{}) -> I; +src2_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). + +-spec src2b_temps(subst_fun(), src2b()) -> src2b(). +src2b_temps(_SubstTemp, I=#sparc_uimm5{}) -> I; +src2b_temps(SubstTemp, Op) -> src2_temps(SubstTemp, Op). + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#sparc_mfa{}) -> M; +funv_temps(_SubstTemp, P=#sparc_prim{}) -> P; +funv_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ssa/hipe_ssa_liveness.inc b/lib/hipe/ssa/hipe_ssa_liveness.inc index 78488c65fc..46df8b66ad 100644 --- a/lib/hipe/ssa/hipe_ssa_liveness.inc +++ b/lib/hipe/ssa/hipe_ssa_liveness.inc @@ -40,6 +40,15 @@ ssa_liveness__livein/2]). %% ssa_liveness__livein/3], %% ssa_liveness__liveout/2]). +-type set(E) :: gb_sets:set(E). +-type liveness(Label, Var) :: + #{Label => {{Gen :: set(Var), + Kill :: set(Var), + {TotalDirGen :: set(Var), + DirGen :: gb_trees:tree(Label, set(Var))}}, + LiveIn :: set(Var), + LiveOut :: set(Var), + Successors :: [Label]}}. -endif. %% -ifdef(DEBUG_LIVENESS). %% -export([pp_liveness/1]). @@ -262,21 +271,15 @@ update_directed_gen({Pred, Var}, Map)-> %% %% liveness %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). liveness_init(List) -> - liveness_init1(List, gb_trees:empty()). + maps:from_list(List). -liveness_init1([{Label, Info}|Left], Map) -> - liveness_init1(Left, gb_trees:insert(Label, Info, Map)); -liveness_init1([], Map) -> - Map. - -liveness_lookup(Label, Map) -> - {value, Info} = gb_trees:lookup(Label, Map), - Info. - -liveness_update(Label, NewInfo, Map) -> - gb_trees:update(Label, NewInfo, Map). +liveness_lookup(Label, Liveness) -> + maps:get(Label, Liveness). +liveness_update(Label, Val, Liveness) -> + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/test/hipe_testsuite_driver.erl b/lib/hipe/test/hipe_testsuite_driver.erl index a3048d907e..88576775ca 100644 --- a/lib/hipe/test/hipe_testsuite_driver.erl +++ b/lib/hipe/test/hipe_testsuite_driver.erl @@ -99,7 +99,7 @@ write_suite(Suite) -> write_header(#suite{suitename = SuiteName, outputfile = OutputFile, testcases = TestCases}) -> Exports = format_export(TestCases), - TimeLimit = 5, %% with 1 or 2 it fails on some slow machines... + TimeLimit = 6, %% with 1, 2, or 3 it fails on some slow machines... io:format(OutputFile, "%% ATTENTION!\n" "%% This is an automatically generated file. Do not edit.\n\n" @@ -168,6 +168,10 @@ run(TestCase, Dir, _OutDir) -> HiPEOpts = try TestCase:hipe_options() catch error:undef -> [] end, {ok, TestCase} = hipe:c(TestCase, HiPEOpts), ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o1|HiPEOpts]), + ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o0|HiPEOpts]), + ok = TestCase:test(), ToLLVM = try TestCase:to_llvm() catch error:undef -> true end, case ToLLVM andalso hipe:llvm_support_available() of true -> diff --git a/lib/hipe/tools/hipe_timer.erl b/lib/hipe/tools/hipe_timer.erl index 72aa25d440..90bd0bb81f 100644 --- a/lib/hipe/tools/hipe_timer.erl +++ b/lib/hipe/tools/hipe_timer.erl @@ -51,7 +51,7 @@ empty_time() -> {A,_} = erlang:statistics(runtime), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}. + {(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}. time(F) -> WTA = erlang:monotonic_time(), @@ -59,7 +59,7 @@ time(F) -> F(), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}. + {(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}. timer(F) -> WTA = erlang:monotonic_time(), @@ -67,7 +67,7 @@ timer(F) -> R = F(), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {R,{(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}}. + {R,{(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}}. advanced(_Fun, I) when I < 2 -> false; advanced(Fun, Iterations) -> diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile index 66e9421c25..04de7f7823 100644 --- a/lib/hipe/util/Makefile +++ b/lib/hipe/util/Makefile @@ -113,4 +113,3 @@ release_docs_spec: $(EBIN)/hipe_timing.beam: ../main/hipe.hrl -$(EBIN)/hipe_vectors.beam: hipe_vectors.hrl diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl index 7f6c8e91c2..90d736d02c 100644 --- a/lib/hipe/util/hipe_vectors.erl +++ b/lib/hipe/util/hipe_vectors.erl @@ -33,11 +33,25 @@ %% list_to_vector/1, list/1]). --include("hipe_vectors.hrl"). +%%-define(USE_TUPLES, true). +%%-define(USE_GBTREES, true). +-define(USE_ARRAYS, true). + +-type vector() :: vector(_). +-export_type([vector/0, vector/1]). + +-spec new(non_neg_integer(), V) -> vector(E) when V :: E. +-spec set(vector(E), non_neg_integer(), V :: E) -> vector(E). +-spec get(vector(E), non_neg_integer()) -> E. +-spec size(vector(_)) -> non_neg_integer(). +-spec vector_to_list(vector(E)) -> [E]. +%% -spec list_to_vector([E]) -> vector(E). +-spec list(vector(E)) -> [{non_neg_integer(), E}]. %% --------------------------------------------------------------------- -ifdef(USE_TUPLES). +-opaque vector(_) :: tuple(). new(N, V) -> erlang:make_tuple(N, V). @@ -68,8 +82,8 @@ get(Vec, Ix) -> element(Ix+1, Vec). %% --------------------------------------------------------------------- -ifdef(USE_GBTREES). +-opaque vector(E) :: gb_trees:tree(non_neg_integer(), E). --spec new(non_neg_integer(), _) -> hipe_vector(). new(N, V) when is_integer(N), N >= 0 -> gb_trees:from_orddict(mklist(N, V)). @@ -81,14 +95,11 @@ mklist(M, N, V) when M < N -> mklist(_, _, _) -> []. --spec size(hipe_vector()) -> non_neg_integer(). size(V) -> gb_trees:size(V). --spec list(hipe_vector()) -> [{_, _}]. list(Vec) -> gb_trees:to_list(Vec). -%% -spec list_to_vector([_]) -> hipe_vector(). %% list_to_vector(Xs) -> %% gb_trees:from_orddict(index(Xs, 0)). %% @@ -97,16 +108,29 @@ list(Vec) -> %% index([],_) -> %% []. --spec vector_to_list(hipe_vector()) -> [_]. vector_to_list(V) -> gb_trees:values(V). --spec set(hipe_vector(), non_neg_integer(), _) -> hipe_vector(). set(Vec, Ix, V) -> gb_trees:update(Ix, V, Vec). --spec get(hipe_vector(), non_neg_integer()) -> any(). get(Vec, Ix) -> gb_trees:get(Ix, Vec). -endif. %% ifdef USE_GBTREES + +%% --------------------------------------------------------------------- + +-ifdef(USE_ARRAYS). +%%-opaque vector(E) :: array:array(E). +-type vector(E) :: array:array(E). % Work around dialyzer bug + +new(N, V) -> array:new(N, {default, V}). +size(V) -> array:size(V). +list(Vec) -> array:to_orddict(Vec). +%% list_to_vector(Xs) -> array:from_list(Xs). +vector_to_list(V) -> array:to_list(V). +set(Vec, Ix, V) -> array:set(Ix, V, Vec). +get(Vec, Ix) -> array:get(Ix, Vec). + +-endif. %% ifdef USE_ARRAYS diff --git a/lib/hipe/util/hipe_vectors.hrl b/lib/hipe/util/hipe_vectors.hrl deleted file mode 100644 index d4556e9dc4..0000000000 --- a/lib/hipe/util/hipe_vectors.hrl +++ /dev/null @@ -1,29 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%%-define(USE_TUPLES, true). --define(USE_GBTREES, true). - --ifdef(USE_TUPLES). --type hipe_vector() :: tuple(). --endif. - --ifdef(USE_GBTREES). --type hipe_vector() :: gb_trees:tree(). --endif. diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..84edeaebe7 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,9 +60,9 @@ MODULES=hipe_rtl_to_x86 \ hipe_x86_ra_ls \ hipe_x86_ra_naive \ hipe_x86_ra_postconditions \ - hipe_x86_ra_x87_ls \ hipe_x86_registers \ hipe_x86_spill_restore \ + hipe_x86_subst \ hipe_x86_x87 HRL_FILES=hipe_x86.hrl @@ -133,7 +133,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..4c8c98551c 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -85,7 +85,7 @@ conv_insn(I, Map, Data) -> true -> conv_shift(Dst, Src1, BinOp, Src2); false -> - conv_alu(Dst, Src1, BinOp, Src2, []) + conv_alu_nocc(Dst, Src1, BinOp, Src2, []) end, {FixSrc1++FixSrc2++I2, Map2, Data}; #alub{} -> @@ -144,7 +144,7 @@ conv_insn(I, Map, Data) -> {I2, Map, Data}; #load{} -> {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of {byte, signed} -> @@ -171,6 +171,7 @@ conv_insn(I, Map, Data) -> Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), I2 = [hipe_x86:mk_move(Src, Dst)], {I2, Map0, Data}; + #move{src=Dst, dst=Dst} -> {[], Map, Data}; #move{} -> {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +183,11 @@ conv_insn(I, Map, Data) -> I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), {FixArgs++I2, Map0, Data}; #store{} -> - {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixSrc++FixOff++I2, Map2, Data}; + {FixPtr++FixSrc++FixOff++I2, Map2, Data}; #switch{} -> % this one also updates Data :-( %% from hipe_rtl2sparc, but we use a hairy addressing mode %% instead of doing the arithmetic manually @@ -206,7 +207,7 @@ conv_insn(I, Map, Data) -> {I2, Map1, NewData}; #fload{} -> {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], {I2, Map2, Data}; @@ -249,6 +250,22 @@ conv_insn(I, Map, Data) -> %%% Finalise the conversion of a 3-address ALU operation, taking %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) + andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'add', Src2, Tail); + true -> % Use LEA + Type = typeof_dst(Dst), + Mem = case hipe_x86:is_temp(Src1) of + true -> hipe_x86:mk_mem(Src1, Src2, Type); + false -> hipe_x86:mk_mem(Src2, Src1, Type) + end, + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> + conv_alu(Dst, Src1, BinOp, Src2, Tail). + conv_alu(Dst, Src1, 'imul', Src2, Tail) -> mk_imul(Src1, Src2, Dst, Tail); conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -572,6 +589,16 @@ conv_fun(Fun, Map) -> end end. +conv_src_noimm(Opnd, Map) -> + R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), + case hipe_x86:is_imm(Src) of + false -> R; + true -> + Tmp = new_untagged_temp(), + {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], + Tmp, NewMap} + end. + %%% Convert an RTL source operand (imm/var/reg). conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..b9f9c711f3 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -24,7 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, pred/2, - bb/2, bb_add/3]). + bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1, params/1, arity/1, redirect_jmp/3]). @@ -33,6 +33,7 @@ -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_x86.hrl"). -include("../flow/cfg.hrl"). @@ -107,7 +108,7 @@ mk_goto(Label) -> hipe_x86:mk_jmp_label(Label). is_label(I) -> - hipe_x86:is_label(I). + case I of #label{} -> true; _ -> false end. label_name(Label) -> hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 9cba6cbe4b..4455def74e 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -35,7 +35,7 @@ -endif. -module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_use/1]). %% src_use/1]). +-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]). -include("../x86/hipe_x86.hrl"). %%% @@ -64,6 +64,16 @@ insn_def(I) -> _ -> [] end. + +%% @doc Answers whether instruction I defines all allocatable registers. Used by +%% hipe_regalloc_prepass. +-spec insn_defs_all(_) -> boolean(). +insn_defs_all(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + dst_def(Dst) -> case Dst of #x86_temp{} -> [Dst]; diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 8851ead250..fc782571bf 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -46,15 +46,13 @@ -include("../x86/hipe_x86.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> - Formals = fix_formals(hipe_x86:defun_formals(Defun)), - Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> + Formals = fix_formals(hipe_x86_cfg:params(CFG0)), + Temps0 = all_temps(CFG0, Formals), + MinFrame = defun_minframe(CFG0), Temps = ensure_minframe(MinFrame, Temps0), - CFG0 = hipe_x86_cfg:init(Defun), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps), - hipe_x86_cfg:linearise(CFG1). + do_body(CFG0, Liveness, Formals, Temps). fix_formals(Formals) -> fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +67,14 @@ do_body(CFG0, Liveness, Formals, Temps) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_x86_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -609,39 +598,46 @@ temp_is_pseudo(Temp) -> %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_x86_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). + +-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, + tset_filter/2, tset_to_list/1]}). tset_empty() -> - gb_sets:new(). + #{}. tset_size(S) -> - gb_sets:size(S). + map_size(S). tset_insert(S, T) -> - gb_sets:add_element(T, S). + S#{T => []}. -tset_add_list(S, Ts) -> - gb_sets:union(S, gb_sets:from_list(Ts)). +tset_add_list(S, []) -> S; +tset_add_list(S, [T|Ts]) -> + tset_add_list(S#{T => []}, Ts). -tset_del_list(S, Ts) -> - gb_sets:subtract(S, gb_sets:from_list(Ts)). +tset_del_list(S, []) -> S; +tset_del_list(S, [T|Ts]) -> + tset_del_list(maps:remove(T,S), Ts). tset_filter(S, F) -> - gb_sets:filter(F, S). + maps:filter(fun(K, _V) -> F(K) end, S). tset_to_list(S) -> - gb_sets:to_list(S). + maps:keys(S). %%% %%% Compute minimum permissible frame size, ignoring spilled temps. @@ -649,16 +645,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..341269b698 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -53,19 +53,23 @@ ?RTL_TO_X86(MFA, RTL, Options) -> Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), "RTL-to-"?X86STR, Options), - SpillRest = + TransCFG = ?option_time(hipe_x86_cfg:init(Translated), + ?X86STR" to cfg", Options), + SpillRestCFG = case proplists:get_bool(caller_save_spill_restore, Options) of true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), ?X86STR" spill restore", Options); false -> - Translated + TransCFG end, - Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), - ?X86STR" register allocation", Options), - Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), - ?X86STR" frame", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), + AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), + ?X86STR" register allocation", Options), + FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), + ?X86STR" frame", Options), + Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), + ?X86STR" linearise", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl index 9352cf5dbf..ff26a31877 100644 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -171,7 +171,7 @@ pp_insn(Dev, I, Pre) -> #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> io:format(Dev, "\tpseudo_tailcall ", []), pp_fun(Dev, Fun), - io:format(Dev, "~w (", [Arity]), + io:format(Dev, " ~w (", [Arity]), pp_args(Dev, StkArgs), io:format(Dev, ") ~w\n", [Linkage]); #pseudo_tailcall_prepare{} -> diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..b64c22a76c 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -41,60 +41,83 @@ %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun0, Options) -> - %% ?HIPE_X86_PP:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), - %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> + hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, + 0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> + %% hipe_x86_cfg:pp(CFG0), + Liveness0 = ?HIPE_X86_SPECIFIC:analyze(CFG0, no_context), + {CFG1, Liveness, Coloring_fp, SpillIndex} = ra_fp(CFG0, Liveness0, Options), + %% hipe_x86_cfg:pp(CFG1), ?start_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun1)), - element(2,hipe_x86:defun_var_range(Defun1))), - {Defun2, Coloring} + code_size(CFG1), + element(2,hipe_gensym:var_range(x86))), + {CFG2, _, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); naive -> - ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + ?HIPE_X86_RA_NAIVE:ra(CFG1, Liveness, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, ?stop_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun2)), - element(2,hipe_x86:defun_var_range(Defun2))), - %% ?HIPE_X86_PP:pp(Defun2), - ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + code_size(CFG2), + element(2,hipe_gensym:var_range(x86))), + %% hipe_x86_cfg:pp(CFG2), + ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + ?HIPE_X86_SPECIFIC, no_context). -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> - case proplists:get_bool(inline_fp, Options) and - (proplists:get_value(regalloc, Options) =/= naive) of - true -> - case proplists:get_bool(x87, Options) of - true -> - hipe_amd64_ra_x87_ls:ra(Defun, Options); - false -> - hipe_regalloc_loop:ra_fp(Defun, Options, - hipe_coalescing_regalloc, - hipe_amd64_specific_sse2) - end; - false -> - {Defun,[],0} +ra_fp(CFG, Liveness, Options) -> + Regalloc0 = proplists:get_value(regalloc, Options), + {Regalloc, TargetMod} = + case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of + false -> {naive, undefined}; + true -> + case proplists:get_bool(x87, Options) of + true -> {linear_scan, hipe_amd64_specific_x87}; + false -> {Regalloc0, hipe_amd64_specific_sse2} + end + end, + case Regalloc of + coalescing -> + ra_fp(CFG, Liveness, Options, hipe_coalescing_regalloc, TargetMod); + optimistic -> + ra_fp(CFG, Liveness, Options, hipe_optimistic_regalloc, TargetMod); + graph_color -> + ra_fp(CFG, Liveness, Options, hipe_graph_coloring_regalloc, TargetMod); + linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Liveness, Options, TargetMod, + no_context); + naive -> {CFG,Liveness,[],0}; + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) end. + +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod) -> + hipe_regalloc_loop:ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, + no_context). -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Liveness, Options) -> case proplists:get_bool(inline_fp, Options) of true -> - hipe_x86_ra_x87_ls:ra(Defun, Options); + hipe_x86_ra_ls:ra_fp(CFG, Liveness, Options, hipe_x86_specific_x87, + no_context); false -> - {Defun,[],0} + {CFG,Liveness,[],0} end. -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..edfd7b332c 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -25,23 +25,36 @@ -define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_SSE2, hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr), Expr). -else. -define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_X87, hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),). -endif. -module(?HIPE_X86_RA_FINALISE). -export([finalise/4]). -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> - Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> + CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), case proplists:get_bool(x87, Options) of true -> - ?HIPE_X86_X87:map(Defun1); + ?HIPE_X86_X87:map(CFG1); _ -> - Defun1 + case + proplists:get_bool(inline_fp, Options) + and (proplists:get_value(regalloc, Options) =:= linear_scan) + of + %% Ugly, but required to avoid Dialyzer complaints about "Unknown + %% function" hipe_x86_sse2:map/1 + ?IF_HAS_SSE2(true -> + ?HIPE_X86_SSE2:map(CFG1);) + false -> + CFG1 + end end. %%% @@ -50,15 +63,16 @@ finalise(Defun, TempMap, FpMap, Options) -> %%% but I just want this to work now) %%% -finalise_ra(Defun, [], [], _Options) -> - Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> - Code = hipe_x86:defun_code(Defun), - {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> + CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> + {_, SpillLimit} = hipe_gensym:var_range(x86), Map = mk_ra_map(TempMap, SpillLimit), FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - NewCode = ra_code(Code, Map, FpMap0), - Defun#defun{code=NewCode}. + hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). ra_code(Code, Map, FpMap) -> [ra_insn(I, Map, FpMap) || I <- Code]. @@ -230,49 +244,27 @@ mk_ra_map(TempMap, SpillLimit) -> gb_trees:empty(), TempMap). -conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> +conv_ra_maplet({From,To}, SpillLimit, IsPrecoloured) + when is_integer(From), From =< SpillLimit -> %% From should be a pseudo, or a hard reg mapped to itself. - if is_integer(From), From =< SpillLimit -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of - false -> []; - _ -> - case To of - {reg, From} -> []; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) + case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of + false -> ok; + _ -> To = {reg, From}, ok end, %% end of From check case To of - {reg, NewReg} -> + {reg, NewReg} when is_integer(NewReg) -> %% NewReg should be a hard reg, or a pseudo mapped %% to itself (formals are handled this way). - if is_integer(NewReg) -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of - true -> []; - _ -> if From =:= NewReg -> []; - true -> - exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of NewReg check + true = (?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) orelse From =:= NewReg), {From, NewReg}; - {spill, SpillIndex} -> - %% SpillIndex should be >= 0. - if is_integer(SpillIndex), SpillIndex >= 0 -> []; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of SpillIndex check + {spill, SpillIndex} when is_integer(SpillIndex), SpillIndex >= 0 -> ToTempNum = SpillLimit+SpillIndex+1, MaxTempNum = hipe_gensym:get_var(x86), if MaxTempNum >= ToTempNum -> ok; true -> hipe_gensym:set_var(x86, ToTempNum) end, - {From, ToTempNum}; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + {From, ToTempNum} end. mk_ra_map_x87(FpMap, SpillLimit) -> diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..34ce50d494 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -35,41 +35,64 @@ -endif. -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/5]). -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) -> SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( - CFG), + CFG, no_context), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Liveness, Options, TargetMod, TargetCtx) -> + ?inc_counter(ra_calls_counter,1), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + + {Coloring,NewSpillIndex} = + regalloc(CFG, Liveness, + TargetMod:allocatable('linearscan', TargetCtx), + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + TargetMod, TargetCtx), + + {NewCFG, _DidSpill} = + TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan', TargetCtx), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + TargetMod, TargetCtx, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + ?add_spills(Options, NewSpillIndex), + {NewCFG, Liveness, Coloring2, NewSpillIndex2}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> ?inc_counter(ra_iteration_counter,1), %% ?HIPE_X86_PP:pp(Defun), - CFG = hipe_x86_cfg:init(Defun), - {Coloring, NewSpillIndex} = + {Coloring, NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, ?HIPE_X86_REGISTERS:allocatable()-- [?HIPE_X86_REGISTERS:temp1(), ?HIPE_X86_REGISTERS:temp0()], [hipe_x86_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC), - {NewDefun, _DidSpill} = + ?HIPE_X86_SPECIFIC, no_context), + {NewCFG, _DidSpill} = ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), %% ?HIPE_X86_PP:pp(NewDefun), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - ?HIPE_X86_SPECIFIC, TempMap), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + ?HIPE_X86_SPECIFIC, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), case proplists:get_bool(verbose_spills, Options) of @@ -79,8 +102,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> ok end, ?add_spills(Options, NewSpillIndex), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, - DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..35de692e07 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -33,15 +33,14 @@ -endif. -module(?HIPE_X86_RA_NAIVE). --export([ra/3]). +-export([ra/4]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> - #defun{code=Code0} = X86Defun, - Code1 = do_insns(Code0), +ra(CFG0, Liveness, Coloring_fp, Options) -> + CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), NofSpilledFloats = count_non_float_spills(Coloring_fp), NofFloats = length(Coloring_fp), ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,15 +48,17 @@ ra(X86Defun, Coloring_fp, Options) -> NofSpilledFloats - NofFloats), TempMap = [], - {X86Defun#defun{code=Code1, - var_range={0, hipe_gensym:get_var(x86)}}, + {CFG, Liveness, TempMap}. +do_bb(_Lbl, BB) -> + hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). + count_non_float_spills(Coloring_fp) -> count_non_float_spills(Coloring_fp, 0). count_non_float_spills([{_,To}|Tail], Num) -> - case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of + case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To, no_context) of true -> count_non_float_spills(Tail, Num); false -> diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..f496b71828 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -40,14 +40,18 @@ -include("../main/hipe.hrl"). -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) -> %% io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), %% io:format("Rewriting\n"), - #defun{code=Code0} = Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, - DidSpill}. + do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -169,14 +173,22 @@ do_jmp_switch(I, TempMap, Strategy) -> %%% Fix a lea op. do_lea(I, TempMap, Strategy) -> - #lea{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], - true} + #lea{mem=Mem0,temp=Temp0} = I, + {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), + case Mem of + #x86_mem{base=Base, off=#x86_imm{value=0}} -> + %% We've decayed into a move due to both operands being memory (there's an + %% 'add' in FixMem). + {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; + #x86_mem{} -> + {StoreTemp, Temp, DidSpill2} = + case is_mem_opnd(Temp0, TempMap) of + false -> {[], Temp0, false}; + true -> + Temp1 = clone2(Temp0, temp0(Strategy)), + {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} + end, + {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} end. %%% Fix a move op. @@ -377,19 +389,12 @@ is_mem_opnd(Opnd, TempMap) -> Reg = hipe_x86:temp_reg(Opnd), case hipe_x86:temp_is_allocatable(Opnd) of true -> - case tuple_size(TempMap) > Reg of + case + hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> - %% impossible, but was true in ls post and false in normal post - exit({?MODULE,is_mem_opnd,Reg}), - false + ?count_temp(Reg), + true; + false -> false end; false -> true end; @@ -404,15 +409,10 @@ is_spilled(Temp, TempMap) -> case hipe_x86:temp_is_allocatable(Temp) of true -> Reg = hipe_x86:temp_reg(Temp), - case tuple_size(TempMap) > Reg of + case hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> - false - end; + ?count_temp(Reg), + true; false -> false end; @@ -429,14 +429,14 @@ clone(Dst, Strategy) -> end, spill_temp(Type, Strategy). -spill_temp0(Type, 'normal') -> +spill_temp0(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp0(Type, 'linearscan') -> +spill_temp0(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). -spill_temp(Type, 'normal') -> +spill_temp(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp(Type, 'linearscan') -> +spill_temp(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). %%% Make a certain reg into a clone of Dst @@ -448,6 +448,6 @@ clone2(Dst, RegOpt) -> #x86_temp{} -> hipe_x86:temp_type(Dst) end, case RegOpt of - [] -> hipe_x86:mk_new_temp(Type); + [] when Type =/= double -> hipe_x86:mk_new_temp(Type); Reg -> hipe_x86:mk_temp(Reg, Type) end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> - ?inc_counter(ra_calls_counter,1), - CFG = hipe_x86_cfg:init(Defun), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - - {Coloring,NewSpillIndex} = - ?HIPE_X86_RA_LS:regalloc(Cfg, - ?HIPE_X86_SPECIFIC_X87:allocatable(), - [hipe_x86_cfg:start_label(Cfg)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC_X87), - - ?add_spills(Options, NewSpillIndex), - {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl index 179d734501..f00bbfb280 100644 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -224,6 +224,8 @@ ret(N) -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of +%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? {?EDX,tagged},{?EDX,untagged}, diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..32b1eb7b40 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -25,13 +25,11 @@ -ifdef(HIPE_AMD64). -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(X86STR, "amd64"). -else. -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(X86STR, "x86"). -endif. @@ -51,15 +49,13 @@ -include("../flow/cfg.hrl"). % Added for the definition of #cfg{} %% Main function -spill_restore(Defun, Options) -> - CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), - CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), - hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> + CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), + ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). %% Performs the first pass of the algorithm. %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> - CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) -> %% get the labels bottom up Labels = hipe_x86_cfg:postorder(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl new file mode 100644 index 0000000000..5e642d1d06 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_subst.erl @@ -0,0 +1,94 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SUBST, hipe_amd64_subst). +-else. +-define(HIPE_X86_SUBST, hipe_x86_subst). +-endif. + +-module(?HIPE_X86_SUBST). +-export([insn_temps/2]). +-include("../x86/hipe_x86.hrl"). + +%% These should be moved to hipe_x86 and exported +-type temp() :: #x86_temp{}. +-type oper() :: temp() | #x86_imm{} | #x86_mem{}. +-type mfarec() :: #x86_mfa{}. +-type prim() :: #x86_prim{}. +-type funv() :: mfarec() | prim() | temp(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(SubstTemp, I) -> + O = fun(O) -> oper_temps(SubstTemp, O) end, + case I of + #alu {src=S, dst=D} -> I#alu {src=O(S), dst=O(D)}; + #cmovcc {src=S, dst=D} -> I#cmovcc {src=O(S), dst=O(D)}; + #cmp {src=S, dst=D} -> I#cmp {src=O(S), dst=O(D)}; + #fmove {src=S, dst=D} -> I#fmove {src=O(S), dst=O(D)}; + #fp_binop{src=S, dst=D} -> I#fp_binop{src=O(S), dst=O(D)}; + #imul {src=S, temp=T} -> I#imul {src=O(S), temp=O(T)}; + #lea {mem=M, temp=T} -> I#lea {mem=O(M), temp=O(T)}; + #move {src=S, dst=D} -> I#move {src=O(S), dst=O(D)}; + #movsx {src=S, dst=D} -> I#movsx {src=O(S), dst=O(D)}; + #movzx {src=S, dst=D} -> I#movzx {src=O(S), dst=O(D)}; + #shift {src=S, dst=D} -> I#shift {src=O(S), dst=O(D)}; + #test {src=S, dst=D} -> I#test {src=O(S), dst=O(D)}; + #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; + #move64 {dst=D} -> I#move64 {dst=O(D)}; + #push {src=S} -> I#push {src=O(S)}; + #pop {dst=D} -> I#pop {dst=O(D)}; + #jmp_switch{temp=T, jtab=J} -> + I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; + #pseudo_call{'fun'=F} -> + I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; + #pseudo_tailcall{'fun'=F, stkargs=Stk} -> + I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), + stkargs=lists:map(O, Stk)}; + #comment{} -> I; + #jmp_label{} -> I; + #pseudo_tailcall_prepare{} -> I; + #pseudo_jcc{} -> I; + #ret{} -> I + end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(_SubstTemp, I=#x86_imm{}) -> I; +oper_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T); +oper_temps(SubstTemp, M=#x86_mem{base=Base,off=Off}) -> + M#x86_mem{base=oper_temps(SubstTemp, Base), + off =oper_temps(SubstTemp, Off)}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, MFA=#x86_mfa{}) -> MFA; +funv_temps(_SubstTemp, P=#x86_prim{}) -> P; +funv_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). + +%% TODO: Undo this ifdeffery at the source (make jtab an #x86_imm{} on x86) +-ifdef(HIPE_AMD64). +jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). +-else. +jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. +-endif. diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..10bb6aa75c 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -41,13 +41,12 @@ %%---------------------------------------------------------------------- -map(Defun) -> - CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) -> %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - hipe_x86_cfg:linearise(CFG1). + CFG1. do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of |