diff options
Diffstat (limited to 'lib/hipe')
87 files changed, 3603 insertions, 481 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile index 617f6749ac..d0da8cdff6 100644 --- a/lib/hipe/amd64/Makefile +++ b/lib/hipe/amd64/Makefile @@ -128,6 +128,7 @@ $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl +$(EBIN)/hipe_amd64_subst.beam: ../x86/hipe_x86_subst.erl $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl $(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl index f8cc0c7d83..bda2824ffc 100644 --- a/lib/hipe/amd64/hipe_amd64_encode.erl +++ b/lib/hipe/amd64/hipe_amd64_encode.erl @@ -1316,6 +1316,7 @@ dotest1(OS) -> RM64 = {rm64,rm_reg(?EDX)}, RM32 = {rm32,rm_reg(?EDX)}, RM16 = {rm16,rm_reg(?EDX)}, + RM16REX = {rm16,rm_reg(?R13)}, RM8 = {rm8,rm_reg(?EDX)}, RM8REX = {rm8,rm_reg(?SIL)}, Rel32 = {rel32,Word32}, @@ -1479,6 +1480,7 @@ dotest1(OS) -> t(OS,'test',{RM8,Imm8}), t(OS,'test',{RM8REX,Imm8}), t(OS,'test',{RM16,Imm16}), + t(OS,'test',{RM16REX,Imm16}), t(OS,'test',{RM32,Imm32}), t(OS,'test',{RM64,Imm32}), t(OS,'test',{RM32,Reg32}), diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl index 8a3ea92156..891c874a15 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -53,6 +53,8 @@ do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} do_fp_unop(I, TempMap, Strategy); #fp_binop{} -> do_fp_binop(I, TempMap, Strategy); + #pseudo_spill_fmove{} -> + do_pseudo_spill_fmove(I, TempMap, Strategy); _ -> %% All non sse2 ops {[I], false} @@ -95,8 +97,13 @@ do_fmove(I, TempMap, Strategy) -> of true -> Tmp = spill_temp(double, Strategy), - {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}], - true}; + %% pseudo_spill_fmove allows spill slot move coalescing, but must not + %% contain memory operands (except for spilled temps) + Is = case is_float_temp(Src) andalso is_float_temp(Dst) of + true -> [#pseudo_spill_fmove{src=Src, temp=Tmp, dst=Dst}]; + false -> [#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}] + end, + {Is, true}; false -> {[I], false} end. @@ -104,6 +111,12 @@ do_fmove(I, TempMap, Strategy) -> is_float_temp(#x86_temp{type=Type}) -> Type =:= double; is_float_temp(#x86_mem{}) -> false. +%%% Fix an pseudo_spill_fmove op. +do_pseudo_spill_fmove(I = #pseudo_spill_fmove{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = is_mem_opnd(Temp, TempMap), + {[I], false}. % nothing to do + %%% Check if an operand denotes a memory cell (mem or pseudo). is_mem_opnd(Opnd, TempMap) -> diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index a4cb71a106..a5cecef5a1 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -207,19 +207,14 @@ allocatable_x87() -> nr_args() -> ?AMD64_NR_ARG_REGS. -arg(N) -> - if N < ?AMD64_NR_ARG_REGS -> - case N of - 0 -> ?ARG0; - 1 -> ?ARG1; - 2 -> ?ARG2; - 3 -> ?ARG3; - 4 -> ?ARG4; - 5 -> ?ARG5; - _ -> exit({?MODULE, arg, N}) - end; - true -> - exit({?MODULE, arg, N}) +arg(N) when N < ?AMD64_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5 end. is_arg(R) -> @@ -240,11 +235,7 @@ args(Arity) when is_integer(Arity), Arity >= 0 -> args(I, Rest) when I < 0 -> Rest; args(I, Rest) -> args(I-1, [arg(I) | Rest]). -ret(N) -> - case N of - 0 -> ?RAX; - _ -> exit({?MODULE, ret, N}) - end. +ret(0) -> ?RAX. %% Note: the fact that (allocatable() UNION allocatable_x87() UNION %% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in diff --git a/lib/hipe/arm/hipe_arm.erl b/lib/hipe/arm/hipe_arm.erl index e34a00f561..3b090b501a 100644 --- a/lib/hipe/arm/hipe_arm.erl +++ b/lib/hipe/arm/hipe_arm.erl @@ -79,6 +79,9 @@ pseudo_move_dst/1, pseudo_move_src/1, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_switch/3, mk_pseudo_tailcall/4, @@ -250,6 +253,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. pseudo_move_src(#pseudo_move{src=Src}) -> Src. +mk_pseudo_spill_move(Dst, Temp, Src) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_switch(JTab, Index, Labels) -> #pseudo_switch{jtab=JTab, index=Index, labels=Labels}. diff --git a/lib/hipe/arm/hipe_arm.hrl b/lib/hipe/arm/hipe_arm.hrl index 67bc07634e..be06b1ebd7 100644 --- a/lib/hipe/arm/hipe_arm.hrl +++ b/lib/hipe/arm/hipe_arm.hrl @@ -101,6 +101,7 @@ -record(pseudo_call_prepare, {nrstkargs}). -record(pseudo_li, {dst, imm, label}). % pre-generated label for use by the assembler -record(pseudo_move, {dst, src}). +-record(pseudo_spill_move, {dst, temp, src}). -record(pseudo_switch, {jtab, index, labels}). -record(pseudo_tailcall, {funv, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). diff --git a/lib/hipe/arm/hipe_arm_assemble.erl b/lib/hipe/arm/hipe_arm_assemble.erl index 713c148742..9aa730afa9 100644 --- a/lib/hipe/arm/hipe_arm_assemble.erl +++ b/lib/hipe/arm/hipe_arm_assemble.erl @@ -31,7 +31,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, 4), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl index ea6da67317..0bc3df30b9 100644 --- a/lib/hipe/arm/hipe_arm_cfg.erl +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -24,6 +24,7 @@ -export([params/1, reverse_postorder/1]). -export([arity/1]). % for linear scan %%-export([redirect_jmp/3]). +-export([branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(BREADTH_ORDER,true). % for linear scan @@ -75,6 +76,26 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + #pseudo_switch{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl index 0e62070c6c..652299a514 100644 --- a/lib/hipe/arm/hipe_arm_defuse.erl +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -40,6 +40,7 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_li{dst=Dst} -> [Dst]; #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_spill_move{dst=Dst, temp=Temp} -> [Dst, Temp]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #smull{dstlo=DstLo,dsthi=DstHi,src1=Src1} -> %% ARM requires DstLo, DstHi, and Src1 to be distinct. @@ -83,6 +84,7 @@ insn_use_gpr(I) -> #pseudo_call{funv=FunV,sdesc=#arm_sdesc{arity=Arity}} -> funv_use(FunV, arity_use_gpr(Arity)); #pseudo_move{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_switch{jtab=JTabR,index=IndexR} -> addtemp(JTabR, [IndexR]); #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> addargs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl index e323907e31..a1004fb609 100644 --- a/lib/hipe/arm/hipe_arm_frame.erl +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -69,6 +69,8 @@ do_insn(I, LiveOut, Context, FPoff) -> do_pseudo_call_prepare(I, FPoff); #pseudo_move{} -> {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; _ -> @@ -100,6 +102,26 @@ pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). %%% +%%% Moves from one spill slot to another +%%% + +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_arm:mk_pseudo_move(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load('ldr', Temp, SrcOffset, mk_sp(), + mk_store('str', Temp, DstOffset, mk_sp(), [])) + end + end. + +%%% %%% Return - deallocate frame and emit 'ret $N' insn. %%% diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl index 9bfe0a9a83..80cd470708 100644 --- a/lib/hipe/arm/hipe_arm_ra_finalise.erl +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -25,11 +25,17 @@ ra_bb(BB, Map) -> hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])). ra_code([I|Insns], Map, Accum) -> - ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); + ra_code(Insns, Map, ra_insn(I, Map, Accum)); ra_code([], _Map, Accum) -> lists:reverse(Accum). -ra_insn(I, Map) -> +ra_insn(I, Map, Accum) -> + case I of + #pseudo_move{} -> ra_pseudo_move(I, Map, Accum); + _ -> [ra_insn_1(I, Map) | Accum] + end. + +ra_insn_1(I, Map) -> case I of #alu{} -> ra_alu(I, Map); #cmp{} -> ra_cmp(I, Map); @@ -38,7 +44,7 @@ ra_insn(I, Map) -> #move{} -> ra_move(I, Map); #pseudo_call{} -> ra_pseudo_call(I, Map); #pseudo_li{} -> ra_pseudo_li(I, Map); - #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_switch{} -> ra_pseudo_switch(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #smull{} -> ra_smull(I, Map); @@ -80,10 +86,19 @@ ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) -> NewDst = ra_temp(Dst, Map), I#pseudo_li{dst=NewDst}. -ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> +ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map, Accum) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + case NewSrc#arm_temp.reg =:= NewDst#arm_temp.reg of + true -> Accum; + false -> [I#pseudo_move{dst=NewDst,src=NewSrc} | Accum] + end. + +ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) -> NewDst = ra_temp(Dst, Map), + NewTemp = ra_temp(Temp, Map), NewSrc = ra_temp(Src, Map), - I#pseudo_move{dst=NewDst,src=NewSrc}. + I#pseudo_spill_move{dst=NewDst, temp=NewTemp, src=NewSrc}. ra_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, Map) -> NewJTab = ra_temp(JTab, Map), diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl index 8d1ee1cb94..23c305511f 100644 --- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -56,6 +56,7 @@ do_insn(I, TempMap, Strategy) -> #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #pseudo_switch{} -> do_pseudo_switch(I, TempMap, Strategy); #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); #smull{} -> do_smull(I, TempMap, Strategy); @@ -108,18 +109,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move and pseudo_tailcall are special cases: in - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move, pseudo_spill_move, and pseudo_tailcall + %% are special cases: in all other instructions, all + %% temps must be non-pseudos after register allocation. + case temp_is_spilled(Dst, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. % nothing to do + do_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, TempMap, Strategy) -> {FixJTab,NewJTab,DidSpill1} = fix_src1(JTab, TempMap, Strategy), {FixIndex,NewIndex,DidSpill2} = fix_src2(Index, TempMap, Strategy), diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl index 7510c197bd..4ff245f414 100644 --- a/lib/hipe/arm/hipe_arm_subst.erl +++ b/lib/hipe/arm/hipe_arm_subst.erl @@ -13,7 +13,7 @@ %% limitations under the License. -module(hipe_arm_subst). --export([insn_temps/2]). +-export([insn_temps/2, insn_lbls/2]). -include("hipe_arm.hrl"). %% These should be moved to hipe_arm and exported @@ -31,6 +31,7 @@ -type am3() :: #am3{}. -type arg() :: temp() | integer(). -type funv() :: #arm_mfa{} | #arm_prim{} | temp(). +-type label() :: non_neg_integer(). -type insn() :: tuple(). % for now -type subst_fun() :: fun((temp()) -> temp()). @@ -58,6 +59,8 @@ insn_temps(T, I) -> #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)}; #pseudo_call_prepare{} -> I; #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; + #pseudo_spill_move{dst=D,temp=U,src=S} -> + I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)}; #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} -> I#pseudo_switch{jtab=T(J),index=T(Ix)}; #pseudo_tailcall{funv=F,stkargs=Stk} -> @@ -103,3 +106,22 @@ funv_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). -spec arg_temps(subst_fun(), arg()) -> arg(). arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; arg_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). + +-type lbl_subst_fun() :: fun((label()) -> label()). + +%% @doc Maps over the branch targets in an instruction +-spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). +insn_lbls(SubstLbl, I) -> + case I of + #b_label{label=Label} -> + I#b_label{label=SubstLbl(Label)}; + #pseudo_bc{true_label=T, false_label=F} -> + I#pseudo_bc{true_label=SubstLbl(T), false_label=SubstLbl(F)}; + #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> + I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), + contlab=SubstLbl(Contlab)} + end. + +sdesc_lbls(_SubstLbl, Sdesc=#arm_sdesc{exnlab=[]}) -> Sdesc; +sdesc_lbls(SubstLbl, Sdesc=#arm_sdesc{exnlab=Exnlab}) -> + Sdesc#arm_sdesc{exnlab=SubstLbl(Exnlab)}. diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 8c96e60229..9321750d44 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -2029,17 +2029,14 @@ arith_rem(Min1, Max1, Min2, Max2) -> Min1_geq_zero = infinity_geq(Min1, 0), Max1_leq_zero = infinity_geq(0, Max1), Max_range2 = infinity_max([infinity_abs(Min2), infinity_abs(Max2)]), - Max_range2_leq_zero = infinity_geq(0, Max_range2), - New_min = + New_min = if Min1_geq_zero -> 0; Max_range2 =:= 0 -> 0; - Max_range2_leq_zero -> infinity_add(Max_range2, 1); true -> infinity_add(infinity_inv(Max_range2), 1) end, New_max = if Max1_leq_zero -> 0; Max_range2 =:= 0 -> 0; - Max_range2_leq_zero -> infinity_add(infinity_inv(Max_range2), -1); true -> infinity_add(Max_range2, -1) end, {New_min, New_max}. diff --git a/lib/hipe/doc/src/notes.xml b/lib/hipe/doc/src/notes.xml index 314fd55ba3..58ca0b2138 100644 --- a/lib/hipe/doc/src/notes.xml +++ b/lib/hipe/doc/src/notes.xml @@ -31,6 +31,26 @@ </header> <p>This document describes the changes made to HiPE.</p> +<section><title>Hipe 3.15.4</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> Fix a bug concerning parameterized opaque types. </p> + <p> + Own Id: OTP-14130</p> + </item> + <item> + <p> + Fixed xml issues in old release notes</p> + <p> + Own Id: OTP-14269</p> + </item> + </list> + </section> + +</section> + <section><title>Hipe 3.15.3</title> <section><title>Fixed Bugs and Malfunctions</title> @@ -130,12 +150,12 @@ </item> <item> <p> - Various fixes and improvements to the HiPE LLVM backend. + Various fixes and improvements to the HiPE LLVM backend.</p> <list> <item>Add support for LLVM 3.7 and 3.8 in the HiPE/LLVM x86_64 backend</item> <item>Reinstate support for the LLVM backend on x86 (works OK for LLVM 3.5 to 3.7 -- LLVM 3.8 has a bug that prevents it from generating - correct native code on x86)</item> </list></p> + correct native code on x86)</item> </list> <p> Own Id: OTP-13626</p> </item> @@ -191,7 +211,7 @@ <item> <p> Fix various binary construction inconsistencies for hipe - compiled code. <list> <item>Passing bad field sizes to + compiled code.</p> <list> <item>Passing bad field sizes to binary constructions would throw <c>badarith</c> rather than <c>badarg</c>. Worse, in guards, when the unit size of the field was 1, the exception would leak rather than @@ -211,7 +231,7 @@ missing check for unit size match when inserting a binary. For example, a faulty expression like <c><<<<1:7>>/binary>></c> would - succeed.</item> </list></p> + succeed.</item> </list> <p> Own Id: OTP-13272</p> </item> diff --git a/lib/hipe/flow/ebb.inc b/lib/hipe/flow/ebb.inc index 58213e44d5..e4b7fd0efb 100644 --- a/lib/hipe/flow/ebb.inc +++ b/lib/hipe/flow/ebb.inc @@ -40,12 +40,14 @@ %% | {ebb_leaf, SuccesorLabel} %%-------------------------------------------------------------------- -%% XXX: Cheating big time! no recursive types --type ebb() :: {ebb_node, icode_lbl(), _} - | {ebb_leaf, icode_lbl()}. +-type ebb() :: ebb_node() + | ebb_leaf(). -record(ebb_node, {label :: icode_lbl(), successors :: [ebb()]}). +-type ebb_node() :: #ebb_node{}. + -record(ebb_leaf, {successor :: icode_lbl()}). +-type ebb_leaf() :: #ebb_leaf{}. %%-------------------------------------------------------------------- %% Returns a list of extended basic blocks. @@ -193,7 +195,7 @@ add_succ([Lbl|Lbls], Visited, Node, MkFun, EBBs, CFG) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec mk_node(icode_lbl(), [ebb()]) -> #ebb_node{}. +-spec mk_node(icode_lbl(), [ebb()]) -> ebb_node(). mk_node(Label, Successors) -> #ebb_node{label=Label, successors=Successors}. -spec node_label(#ebb_node{}) -> icode_lbl(). @@ -202,11 +204,11 @@ node_label(#ebb_node{label=Label}) -> Label. -spec node_successors(#ebb_node{}) -> [ebb()]. node_successors(#ebb_node{successors=Successors}) -> Successors. --spec mk_leaf(icode_lbl()) -> #ebb_leaf{}. +-spec mk_leaf(icode_lbl()) -> ebb_leaf(). mk_leaf(NextEbb) -> #ebb_leaf{successor=NextEbb}. %% leaf_next(Leaf) -> Leaf#ebb_leaf.successor. --spec type(#ebb_node{}) -> 'node' ; (#ebb_leaf{}) -> 'leaf'. +-spec type(ebb_node()) -> 'node' ; (ebb_leaf()) -> 'leaf'. type(#ebb_node{}) -> node; type(#ebb_leaf{}) -> leaf. diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl index 100bc0b0e2..2abecf7f18 100644 --- a/lib/hipe/icode/hipe_beam_to_icode.erl +++ b/lib/hipe/icode/hipe_beam_to_icode.erl @@ -148,7 +148,8 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> {Code3,_Env3} = mk_debug_calltrace(MFA, Env1, Code2), {Code3,_Env3} = {Code2,Env1}), %% For stack optimization - Leafness = leafness(Code3), + IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure, + Leafness = leafness(Code3, IsClosure), IsLeaf = is_leaf_code(Leafness), Code4 = [FunLbl | @@ -156,7 +157,6 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> false -> Code3; true -> [mk_redtest()|Code3] end], - IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure, Code5 = hipe_icode:mk_icode(MFA, FunArgs, IsClosure, IsLeaf, remove_dead_code(Code4), hipe_gensym:var_range(icode), @@ -173,12 +173,12 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> mk_redtest() -> hipe_icode:mk_primop([], redtest, []). -leafness(Is) -> % -> true, selfrec, or false - leafness(Is, true). +leafness(Is, IsClosure) -> % -> true, selfrec, closure, or false + leafness(Is, IsClosure, true). -leafness([], Leafness) -> +leafness([], _IsClosure, Leafness) -> Leafness; -leafness([I|Is], Leafness) -> +leafness([I|Is], IsClosure, Leafness) -> case I of #icode_comment{} -> %% BEAM self-tailcalls become gotos, but they leave @@ -191,7 +191,7 @@ leafness([I|Is], Leafness) -> 'self_tail_recursive' -> selfrec; % call_only to selfrec _ -> Leafness end, - leafness(Is, NewLeafness); + leafness(Is, IsClosure, NewLeafness); #icode_call{} -> case hipe_icode:call_type(I) of 'primop' -> @@ -199,12 +199,12 @@ leafness([I|Is], Leafness) -> call_fun -> false; % Calls closure enter_fun -> false; % Calls closure #apply_N{} -> false; - _ -> leafness(Is, Leafness) % Other primop calls are ok + _ -> leafness(Is, IsClosure, Leafness) % Other primop calls are ok end; T when T =:= 'local' orelse T =:= 'remote' -> {M,F,A} = hipe_icode:call_fun(I), case erlang:is_builtin(M, F, A) of - true -> leafness(Is, Leafness); + true -> leafness(Is, IsClosure, Leafness); false -> false end end; @@ -223,11 +223,12 @@ leafness([I|Is], Leafness) -> T when T =:= 'local' orelse T =:= 'remote' -> {M,F,A} = hipe_icode:enter_fun(I), case erlang:is_builtin(M, F, A) of - true -> leafness(Is, Leafness); + true -> leafness(Is, IsClosure, Leafness); + _ when IsClosure -> leafness(Is, IsClosure, closure); _ -> false end end; - _ -> leafness(Is, Leafness) + _ -> leafness(Is, IsClosure, Leafness) end. %% XXX: this old stuff is passed around but essentially unused @@ -235,12 +236,20 @@ is_leaf_code(Leafness) -> case Leafness of true -> true; selfrec -> true; + closure -> false; false -> false end. needs_redtest(Leafness) -> case Leafness of true -> false; + %% A "leaf" closure may contain tailcalls to non-closures in addition to + %% what other leaves may contain. Omitting the redtest is useful to generate + %% shorter code for closures generated by (fun F/A), and is safe since + %% control flow cannot return to a "leaf" closure again without a reduction + %% being consumed. This is true since no function that can call a closure + %% will ever have its redtest omitted. + closure -> false; selfrec -> true; false -> true end. @@ -504,6 +513,19 @@ trans_fun([{test,test_arity,{f,Lbl},[Reg,N]}|Instructions], Env) -> I = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, hipe_icode:label_name(True),map_label(Lbl)), [I,True | trans_fun(Instructions,Env)]; +%%--- test_is_tagged_tuple --- +trans_fun([{test,is_tagged_tuple,{f,Lbl},[Reg,N,Atom]}|Instructions], Env) -> + TrueArity = mk_label(new), + IArity = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, + hipe_icode:label_name(TrueArity),map_label(Lbl)), + Var = hipe_icode:mk_new_var(), + IGet = hipe_icode:mk_primop([Var], + #unsafe_element{index=1}, + [trans_arg(Reg)]), + TrueAtom = mk_label(new), + IEQ = hipe_icode:mk_type([Var], Atom, hipe_icode:label_name(TrueAtom), + map_label(Lbl)), + [IArity,TrueArity,IGet,IEQ,TrueAtom | trans_fun(Instructions,Env)]; %%--- is_map --- trans_fun([{test,is_map,{f,Lbl},[Arg]}|Instructions], Env) -> {Code,Env1} = trans_type_test(map,Lbl,Arg,Env), diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl index b884132327..287b1c80fe 100644 --- a/lib/hipe/icode/hipe_icode_range.erl +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -392,14 +392,17 @@ widen(#range{range=Old}, #range{range=New}, T = #range{range=Wide}) -> -spec analyse_call(#icode_call{}, call_fun()) -> #icode_call{}. analyse_call(Call, LookupFun) -> + Args = hipe_icode:args(Call), + Fun = hipe_icode:call_fun(Call), + Type = hipe_icode:call_type(Call), + %% This call has side-effects (it might call LookupFun which sends messages to + %% hipe_icode_coordinator to update the argument ranges of Fun), and must thus + %% not be moved into the case statement. + DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun), case hipe_icode:call_dstlist(Call) of [] -> Call; Dsts -> - Args = hipe_icode:args(Call), - Fun = hipe_icode:call_fun(Call), - Type = hipe_icode:call_type(Call), - DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun), NewDefs = [update_info(Var, R) || {Var,R} <- lists:zip(Dsts, DstRanges)], hipe_icode:subst_defines(lists:zip(Dsts, NewDefs), Call) end. @@ -1306,16 +1309,15 @@ range_rem(Range1, Range2) -> Min1_geq_zero = inf_geq(Min1, 0), Max1_leq_zero = inf_geq(0, Max1), Max_range2 = inf_max([inf_abs(Min2), inf_abs(Max2)]), - Max_range2_leq_zero = inf_geq(0, Max_range2), New_min = if Min1_geq_zero -> 0; - Max_range2_leq_zero -> Max_range2; - true -> inf_inv(Max_range2) + Max_range2 =:= 0 -> 0; + true -> inf_add(inf_inv(Max_range2), 1) end, New_max = if Max1_leq_zero -> 0; - Max_range2_leq_zero -> inf_inv(Max_range2); - true -> Max_range2 + Max_range2 =:= 0 -> 0; + true -> inf_add(Max_range2, -1) end, range_init({New_min, New_max}, false). diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl index 815d1e57a8..aafaeb5a0a 100644 --- a/lib/hipe/icode/hipe_icode_type.erl +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -1410,9 +1410,10 @@ transform_element2(I) -> NewIndex = case test_type(integer, IndexType) of true -> - case t_number_vals(IndexType) of - unknown -> unknown; - [_|_] = Vals -> {number, Vals} + case {number_min(IndexType), number_max(IndexType)} of + {Lb0, Ub0} when is_integer(Lb0), is_integer(Ub0) -> + {number, Lb0, Ub0}; + {_, _} -> unknown end; _ -> unknown end, @@ -1427,19 +1428,19 @@ transform_element2(I) -> _ -> unknown end, case {NewIndex, MinSize} of - {{number, [_|_] = Ns}, {tuple, A}} when is_integer(A) -> - case lists:all(fun(X) -> 0 < X andalso X =< A end, Ns) of + {{number, Lb, Ub}, {tuple, A}} when is_integer(A) -> + case 0 < Lb andalso Ub =< A of true -> - case Ns of - [Idx] -> + case {Lb, Ub} of + {Idx, Idx} -> [_, Tuple] = hipe_icode:args(I), update_call_or_enter(I, #unsafe_element{index = Idx}, [Tuple]); - [_|_] -> + {_, _} -> NewFun = {element, [MinSize, valid]}, update_call_or_enter(I, NewFun) end; false -> - case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, Ns) of + case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, [Lb, Ub]) of true -> NewFun = {element, [MinSize, fixnums]}, update_call_or_enter(I, NewFun); @@ -1454,7 +1455,7 @@ transform_element2(I) -> NewFun = {element, [MinSize, fixnums]}, update_call_or_enter(I, NewFun); false -> - NewFun = {element, [MinSize, NewIndex]}, + NewFun = {element, [MinSize, NewIndex]}, update_call_or_enter(I, NewFun) end end. diff --git a/lib/hipe/llvm/Makefile b/lib/hipe/llvm/Makefile index 88016a7d8b..e8d9a0e8bb 100644 --- a/lib/hipe/llvm/Makefile +++ b/lib/hipe/llvm/Makefile @@ -73,8 +73,7 @@ include ../native.mk ERL_COMPILE_FLAGS += -Werror +inline +warn_export_vars #+warn_missing_spec # if in 32 bit backend define BIT32 symbol -ARCH = $(shell echo $(TARGET) | sed 's/^\(x86_64\)-.*/64bit/') -ifneq ($(ARCH), 64bit) +ifneq ($(BITS64),yes) ERL_COMPILE_FLAGS += -DBIT32 endif diff --git a/lib/hipe/llvm/hipe_llvm_main.erl b/lib/hipe/llvm/hipe_llvm_main.erl index 0957dd4df2..4eec0c752b 100644 --- a/lib/hipe/llvm/hipe_llvm_main.erl +++ b/lib/hipe/llvm/hipe_llvm_main.erl @@ -108,8 +108,10 @@ llvm_llc(Dir, Filename, Ver, Options) -> OptLevel = trans_optlev_flag(llc, Options), VerFlags = llc_ver_flags(Ver), Align = find_stack_alignment(), + Target = llc_target_opt(), LlcFlags = [OptLevel, "-code-model=medium", "-stack-alignment=" ++ Align , "-tailcallopt", "-filetype=asm" %FIXME + , Target | VerFlags], Command = "llc " ++ fix_opts(LlcFlags) ++ " " ++ Source, %% io:format("LLC: ~s~n", [Command]), @@ -123,7 +125,8 @@ llvm_llc(Dir, Filename, Ver, Options) -> compile(Dir, Fun_Name, Compiler) -> Source = Dir ++ Fun_Name ++ ".s", Dest = Dir ++ Fun_Name ++ ".o", - Command = Compiler ++ " -c " ++ Source ++ " -o " ++ Dest, + Target = compiler_target_opt(), + Command = Compiler ++ " " ++ Target ++ " -c " ++ Source ++ " -o " ++ Dest, %% io:format("~s: ~s~n", [Compiler, Command]), case os:cmd(Command) of "" -> ok; @@ -137,6 +140,18 @@ find_stack_alignment() -> _ -> exit({?MODULE, find_stack_alignment, "Unimplemented architecture"}) end. +llc_target_opt() -> + case get(hipe_target_arch) of + x86 -> "-march=x86"; + amd64 -> "-march=x86-64" + end. + +compiler_target_opt() -> + case get(hipe_target_arch) of + x86 -> "-m32"; + amd64 -> "-m64" + end. + %% @doc Join options. fix_opts(Opts) -> string:join(Opts, " "). diff --git a/lib/hipe/llvm/hipe_llvm_merge.erl b/lib/hipe/llvm/hipe_llvm_merge.erl index 6e891ac3b0..58d862fbb2 100644 --- a/lib/hipe/llvm/hipe_llvm_merge.erl +++ b/lib/hipe/llvm/hipe_llvm_merge.erl @@ -13,7 +13,7 @@ finalize(CompiledCode, Closures, Exports) -> Code = [{MFA, [], ConstTab} || {MFA, _, _ , ConstTab, _, _} <- CompiledCode1], {ConstAlign, ConstSize, ConstMap, RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, ?ARCH_REGISTERS:alignment()), + hipe_pack_constants:pack_constants(Code), %% Compute total code size separately as a sanity check for alignment CodeSize = compute_code_size(CompiledCode1, 0), %% io:format("Code Size (pre-computed): ~w~n", [CodeSize]), diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl index 208d86841f..79e1bfd381 100644 --- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl +++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl @@ -1364,7 +1364,7 @@ create_function_definition(Fun, Params, Code, LocalVars) -> EntryBlock = lists:flatten([EntryLabel, ExceptionSync, I2, LocalVars, StoredParams, I3]), Final_Code = EntryBlock ++ Code, - FunctionOptions = [nounwind, noredzone, list_to_atom("gc \"erlang\"")], + FunctionOptions = [nounwind, noredzone, 'gc "erlang"'], WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), FunRetTy = hipe_llvm:mk_struct(lists:duplicate(?NR_PINNED_REGS + 1, WordTy)), hipe_llvm:mk_fun_def([], [], "cc 11", [], FunRetTy, FunctionName, Args, diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index af2c02006d..de0b255c01 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -76,6 +76,7 @@ hipe_arm_specific, hipe_arm_subst, hipe_bb, + hipe_bb_weights, hipe_beam_to_icode, hipe_coalescing_regalloc, hipe_consttab, @@ -83,6 +84,7 @@ hipe_digraph, hipe_dominators, hipe_dot, + hipe_dsets, hipe_gen_cfg, hipe_gensym, hipe_graph_coloring_regalloc, @@ -146,9 +148,11 @@ hipe_ppc_specific_fp, hipe_ppc_subst, hipe_profile, + hipe_range_split, hipe_reg_worklists, hipe_regalloc_loop, hipe_regalloc_prepass, + hipe_restore_reuse, hipe_rtl, hipe_rtl_arch, hipe_rtl_arith_32, diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index fff397b060..19b4e8bfe2 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1230,6 +1230,18 @@ option_text(regalloc) -> " optimistic - another variant of a coalescing allocator"; option_text(remove_comments) -> "Strip comments from intermediate code"; +option_text(ra_range_split) -> + "Split live ranges of temporaries live over call instructions\n" + "before performing register allocation.\n" + "Heuristically tries to move stack accesses to the cold path of function.\n" + "This range splitter is more sophisticated than 'ra_restore_reuse', but has\n" + "a significantly larger impact on compile time.\n" + "Should only be used with move coalescing register allocators."; +option_text(ra_restore_reuse) -> + "Split live ranges of temporaries such that straight-line\n" + "code will not need to contain multiple restores from the same stack\n" + "location.\n" + "Should only be used with move coalescing register allocators."; option_text(rtl_ssa) -> "Perform SSA conversion on the RTL level -- default starting at O2"; option_text(rtl_ssa_const_prop) -> @@ -1371,6 +1383,12 @@ opt_keys() -> pp_rtl_linear, ra_partitioned, ra_prespill, + ra_range_split, + ra_restore_reuse, + range_split_min_gain, + range_split_mode1_fudge, + range_split_weight_power, + range_split_weights, regalloc, remove_comments, rtl_ssa, @@ -1409,7 +1427,8 @@ o1_opts(TargetArch) -> icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs, rtl_ssa, rtl_ssa_const_prop, rtl_ssapre, spillmin_color, use_indexing, remove_comments, - binary_opt, {regalloc,coalescing} | o0_opts(TargetArch)], + binary_opt, {regalloc,coalescing}, ra_restore_reuse + | o0_opts(TargetArch)], case TargetArch of ultrasparc -> Common; @@ -1429,7 +1448,8 @@ o1_opts(TargetArch) -> o2_opts(TargetArch) -> Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, - rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])], + ra_range_split, range_split_weights, % XXX: Having defaults here is ugly + rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre, ra_restore_reuse])], case TargetArch of T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets [icode_range | Common]; @@ -1477,6 +1497,9 @@ opt_negations() -> {no_pp_rtl_ssapre, pp_rtl_ssapre}, {no_ra_partitioned, ra_partitioned}, {no_ra_prespill, ra_prespill}, + {no_ra_range_split, ra_range_split}, + {no_ra_restore_reuse, ra_restore_reuse}, + {no_range_split_weights, range_split_weights}, {no_remove_comments, remove_comments}, {no_rtl_ssa, rtl_ssa}, {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, diff --git a/lib/hipe/misc/hipe_consttab.erl b/lib/hipe/misc/hipe_consttab.erl index 64e3d3ccaa..741bdb2094 100644 --- a/lib/hipe/misc/hipe_consttab.erl +++ b/lib/hipe/misc/hipe_consttab.erl @@ -63,9 +63,7 @@ %% A hipe_consttab is a tuple {Data, ReferedLabels, NextConstLabel} %% @type hipe_constlbl(). %% An abstract datatype for referring to data. -%% @type element_type() = byte | word | ctab_array() -%% @type ctab_array() = {ctab_array, Type::element_type(), -%% NoElements::pos_integer()} +%% @type element_type() = byte | word %% @type block() = [integer() | label_ref()] %% @type label_ref() = {label, Label::code_label()} %% @type code_label() = hipe_sparc:label_name() | hipe_x86:label_name() @@ -110,8 +108,7 @@ -type label_ref() :: {'label', code_label()}. -type block() :: [hipe_constlbl() | label_ref()]. --type ctab_array() :: {'ctab_array', 'byte' | 'word', pos_integer()}. --type element_type() :: 'byte' | 'word' | ctab_array(). +-type element_type() :: 'byte' | 'word'. -type sort_order() :: term(). % XXX: FIXME @@ -187,7 +184,7 @@ insert_block({ConstTab, RefToLabels, NextLabel}, ElementType, InitList) -> ReferredLabels = get_labels(InitList, []), NewRefTo = ReferredLabels ++ RefToLabels, {NewTa, Id} = insert_const({ConstTab, NewRefTo, NextLabel}, - block, word_size(), false, + block, size_of(ElementType), false, {ElementType,InitList}), {insert_backrefs(NewTa, Id, ReferredLabels), Id}. @@ -256,13 +253,9 @@ get_labels([], Acc) -> %% @spec size_of(element_type()) -> pos_integer() %% @doc Returns the size in bytes of an element_type. -%% The is_atom/1 guard in the clause handling arrays -%% constraints the argument to 'byte' | 'word' -spec size_of(element_type()) -> pos_integer(). size_of(byte) -> 1; -size_of(word) -> word_size(); -size_of({ctab_array,S,N}) when is_atom(S), is_integer(N), N > 0 -> - N * size_of(S). +size_of(word) -> word_size(). %% @spec decompose({element_type(), block()}) -> [byte()] %% @doc Turns a block into a list of bytes. diff --git a/lib/hipe/misc/hipe_pack_constants.erl b/lib/hipe/misc/hipe_pack_constants.erl index 9dd18bce0f..6736d1f503 100644 --- a/lib/hipe/misc/hipe_pack_constants.erl +++ b/lib/hipe/misc/hipe_pack_constants.erl @@ -13,7 +13,7 @@ %% limitations under the License. -module(hipe_pack_constants). --export([pack_constants/2, slim_refs/1, slim_constmap/1, +-export([pack_constants/1, slim_refs/1, slim_constmap/1, find_const/2, mk_data_relocs/2, slim_sorted_exportmap/3]). -include("hipe_consttab.hrl"). @@ -37,8 +37,8 @@ -record(pcm_entry, {mfa :: mfa(), label :: hipe_constlbl(), - const_num :: const_num(), - start :: addr(), + const_num :: const_num(), + start :: addr(), type :: 0 | 1 | 2, raw_data :: raw_data()}). -type pcm_entry() :: #pcm_entry{}. @@ -53,11 +53,11 @@ %%----------------------------------------------------------------------------- --spec pack_constants([{mfa(),[_],hipe_consttab()}], ct_alignment()) -> +-spec pack_constants([{mfa(),[_],hipe_consttab()}]) -> {ct_alignment(), non_neg_integer(), packed_const_map(), mfa_refs_map()}. -pack_constants(Data, Align) -> - pack_constants(Data, 0, Align, 0, [], []). +pack_constants(Data) -> + pack_constants(Data, 0, 1, 0, [], []). % 1 = byte alignment pack_constants([{MFA,_,ConstTab}|Rest], Size, Align, ConstNo, Acc, Refs) -> Labels = hipe_consttab:labels(ConstTab), diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile index 684d6f45b4..5a729d04ae 100644 --- a/lib/hipe/opt/Makefile +++ b/lib/hipe/opt/Makefile @@ -43,7 +43,8 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) # ---------------------------------------------------- # Target Specs # ---------------------------------------------------- -MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan +MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan \ + hipe_bb_weights HRL_FILES= ERL_FILES= $(MODULES:%=%.erl) diff --git a/lib/hipe/opt/hipe_bb_weights.erl b/lib/hipe/opt/hipe_bb_weights.erl new file mode 100644 index 0000000000..8ef113b94c --- /dev/null +++ b/lib/hipe/opt/hipe_bb_weights.erl @@ -0,0 +1,449 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% BASIC BLOCK WEIGHTING +%% +%% Computes basic block weights by using branch probabilities as weights in a +%% linear equation system, that is then solved using Gauss-Jordan Elimination. +%% +%% The equation system representation is intentionally sparse, since most blocks +%% have at most two successors. +-module(hipe_bb_weights). +-export([compute/3, compute_fast/3, weight/2, call_exn_pred/0]). +-export_type([bb_weights/0]). + +-compile(inline). + +%%-define(DO_ASSERT,1). +%%-define(DEBUG,1). +-include("../main/hipe.hrl"). + +%% If the equation system is large, it might take too long to solve it exactly. +%% Thus, if there are more than ?HEUR_MAX_SOLVE labels, we use the iterative +%% approximation. +-define(HEUR_MAX_SOLVE, 10000). + +-opaque bb_weights() :: #{label() => float()}. + +-type cfg() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. + +-type label() :: integer(). +-type var() :: label(). +-type assignment() :: {var(), float()}. +-type eq_assoc() :: [{var(), key()}]. +-type solution() :: [assignment()]. + +%% Constant. Predicted probability of a call resulting in an exception. +-spec call_exn_pred() -> float(). +call_exn_pred() -> 0.01. + +-spec compute(cfg(), target_module(), target_context()) -> bb_weights(). +compute(CFG, TgtMod, TgtCtx) -> + Target = {TgtMod, TgtCtx}, + Labels = labels(CFG, Target), + if length(Labels) > ?HEUR_MAX_SOLVE -> + ?debug_msg("~w: Too many labels (~w), approximating.~n", + [?MODULE, length(Labels)]), + compute_fast(CFG, TgtMod, TgtCtx); + true -> + {EqSys, EqAssoc} = build_eq_system(CFG, Labels, Target), + case solve(EqSys, EqAssoc) of + {ok, Solution} -> + maps:from_list(Solution) + end + end. + +-spec build_eq_system(cfg(), [label()], target()) -> {eq_system(), eq_assoc()}. +build_eq_system(CFG, Labels, Target) -> + StartLb = hipe_gen_cfg:start_label(CFG), + EQS0 = eqs_new(), + {EQS1, Assoc} = build_eq_system(Labels, CFG, Target, [], EQS0), + {StartLb, StartKey} = lists:keyfind(StartLb, 1, Assoc), + StartRow0 = eqs_get(StartKey, EQS1), + StartRow = row_set_const(-1.0, StartRow0), % -1.0 since StartLb coef is -1.0 + EQS = eqs_put(StartKey, StartRow, EQS1), + {EQS, Assoc}. + +build_eq_system([], _CFG, _Target, Map, EQS) -> {EQS, lists:reverse(Map)}; +build_eq_system([L|Ls], CFG, Target, Map, EQS0) -> + PredProb = pred_prob(L, CFG, Target), + {Key, EQS} = eqs_insert(row_new([{L, -1.0}|PredProb], 0.0), EQS0), + build_eq_system(Ls, CFG, Target, [{L, Key}|Map], EQS). + +pred_prob(L, CFG, Target) -> + [begin + BB = bb(CFG, Pred, Target), + Ps = branch_preds(hipe_bb:last(BB), Target), + ?ASSERT(length(lists:ukeysort(1, Ps)) + =:= length(hipe_gen_cfg:succ(CFG, Pred))), + case lists:keyfind(L, 1, Ps) of + {L, Prob} when is_float(Prob) -> {Pred, Prob} + end + end || Pred <- hipe_gen_cfg:pred(CFG, L)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-spec triangelise(eq_system(), eq_assoc()) -> {eq_system(), eq_assoc()}. +triangelise(EQS, VKs) -> + triangelise_1(mk_triix(EQS, VKs), []). + +triangelise_1(TIX0, Acc) -> + case triix_is_empty(TIX0) of + true -> {triix_eqs(TIX0), lists:reverse(Acc)}; + false -> + {V,Key,TIX1} = triix_pop_smallest(TIX0), + Row0 = triix_get(Key, TIX1), + case row_get(V, Row0) of + Coef when Coef > -0.0001, Coef < 0.0001 -> + throw(error); + _ -> + Row = row_normalise(V, Row0), + TIX2 = triix_put(Key, Row, TIX1), + TIX = eliminate_triix(V, Key, Row, TIX2), + triangelise_1(TIX, [{V,Key}|Acc]) + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Triangelisation maintains its own index, outside of eqs. This index is +%% essentially a BST (used as a heap) of all equations by size, with {Key,Var} +%% as the values and only containing a subset of all the keys in the whole +%% equation system. The key operation is triix_pop_smallest/1, which pops a +%% {Key,Var} from the heap corresponding to one of the smallest equations. This +%% is critical in order to prevent the equations from growing during +%% triangelisation, which would make the algorithm O(n^2) in the common case. +-type tri_eq_system() :: {eq_system(), + gb_trees:tree(non_neg_integer(), + gb_trees:tree(key(), var()))}. + +triix_eqs({EQS, _}) -> EQS. +triix_get(Key, {EQS, _}) -> eqs_get(Key, EQS). +triix_is_empty({_, Tree}) -> gb_trees:is_empty(Tree). +triix_lookup(V, {EQS, _}) -> eqs_lookup(V, EQS). + +mk_triix(EQS, VKs) -> + {EQS, + lists:foldl(fun({V,Key}, Tree) -> + Size = row_size(eqs_get(Key, EQS)), + sitree_insert(Size, Key, V, Tree) + end, gb_trees:empty(), VKs)}. + +sitree_insert(Size, Key, V, SiTree) -> + SubTree1 = + case gb_trees:lookup(Size, SiTree) of + none -> gb_trees:empty(); + {value, SubTree0} -> SubTree0 + end, + SubTree = gb_trees:insert(Key, V, SubTree1), + gb_trees:enter(Size, SubTree, SiTree). + +sitree_update_subtree(Size, SubTree, SiTree) -> + case gb_trees:is_empty(SubTree) of + true -> gb_trees:delete(Size, SiTree); + false -> gb_trees:update(Size, SubTree, SiTree) + end. + +triix_put(Key, Row, {EQS, Tree0}) -> + OldSize = row_size(eqs_get(Key, EQS)), + case row_size(Row) of + OldSize -> {eqs_put(Key, Row, EQS), Tree0}; + Size -> + Tree = + case gb_trees:lookup(OldSize, Tree0) of + none -> Tree0; + {value, SubTree0} -> + case gb_trees:lookup(Key, SubTree0) of + none -> Tree0; + {value, V} -> + SubTree = gb_trees:delete(Key, SubTree0), + Tree1 = sitree_update_subtree(OldSize, SubTree, Tree0), + sitree_insert(Size, Key, V, Tree1) + end + end, + {eqs_put(Key, Row, EQS), Tree} + end. + +triix_pop_smallest({EQS, Tree}) -> + {Size, SubTree0} = gb_trees:smallest(Tree), + {Key, V, SubTree} = gb_trees:take_smallest(SubTree0), + {V, Key, {EQS, sitree_update_subtree(Size, SubTree, Tree)}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +row_normalise(Var, Row) -> + %% Normalise v's coef to 1.0 + %% row_set_coef ensures the coef is exactly 1.0 (no rounding errors) + row_set_coef(Var, 1.0, row_scale(Row, 1.0/row_get(Var, Row))). + +%% Precondition: Row must be normalised; i.e. Vars coef must be 1.0 (mod +%% rounding errors) +-spec eliminate(var(), key(), row(), eq_system()) -> eq_system(). +eliminate(Var, Key, Row, TIX0) -> + eliminate_abstr(Var, Key, Row, TIX0, + fun eqs_get/2, fun eqs_lookup/2, fun eqs_put/3). + +-spec eliminate_triix(var(), key(), row(), tri_eq_system()) -> tri_eq_system(). +eliminate_triix(Var, Key, Row, TIX0) -> + eliminate_abstr(Var, Key, Row, TIX0, + fun triix_get/2, fun triix_lookup/2, fun triix_put/3). + +%% The same function implemented for two data types, eqs and triix. +-compile({inline, eliminate_abstr/7}). +-spec eliminate_abstr(var(), key(), row(), ADT, fun((key(), ADT) -> row()), + fun((var(), ADT) -> [key()]), + fun((key(), row(), ADT) -> ADT)) -> ADT. +eliminate_abstr(Var, Key, Row, ADT0, GetFun, LookupFun, PutFun) -> + ?ASSERT(1.0 =:= row_get(Var, Row)), + ADT = + lists:foldl(fun(RK, ADT1) when RK =:= Key -> ADT1; + (RK, ADT1) -> + R = GetFun(RK, ADT1), + PutFun(RK, row_addmul(R, Row, -row_get(Var, R)), ADT1) + end, ADT0, LookupFun(Var, ADT0)), + [Key] = LookupFun(Var, ADT), + ADT. + +-spec solve(eq_system(), eq_assoc()) -> error | {ok, solution()}. +solve(EQS0, EqAssoc0) -> + try triangelise(EQS0, EqAssoc0) + of {EQS1, EqAssoc} -> + {ok, solve_1(EqAssoc, maps:from_list(EqAssoc), EQS1, [])} + catch error -> error + end. + +solve_1([], _VarEqs, _EQS, Acc) -> Acc; +solve_1([{V,K}|Ps], VarEqs, EQS0, Acc0) -> + Row0 = eqs_get(K, EQS0), + VarsToKill = [Var || {Var, _} <- row_coefs(Row0), Var =/= V], + Row1 = kill_vars(VarsToKill, VarEqs, EQS0, Row0), + [{V,_}] = row_coefs(Row1), % assertion + Row = row_normalise(V, Row1), + [{V,1.0}] = row_coefs(Row), % assertion + EQS = eliminate(V, K, Row, EQS0), + [K] = eqs_lookup(V, EQS), + solve_1(Ps, VarEqs, eqs_remove(K, EQS), [{V, row_const(Row)}|Acc0]). + +kill_vars([], _VarEqs, _EQS, Row) -> Row; +kill_vars([V|Vs], VarEqs, EQS, Row0) -> + VRow0 = eqs_get(maps:get(V, VarEqs), EQS), + VRow = row_normalise(V, VRow0), + ?ASSERT(1.0 =:= row_get(V, VRow)), + Row = row_addmul(Row0, VRow, -row_get(V, Row0)), + ?ASSERT(0.0 =:= row_get(V, Row)), % V has been killed + kill_vars(Vs, VarEqs, EQS, Row). + +-spec weight(label(), bb_weights()) -> float(). +weight(Lbl, Weights) -> + maps:get(Lbl, Weights). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Row datatype +%% Invariant: No 0.0 coefficiets! +-spec row_empty() -> row(). +row_empty() -> {orddict:new(), 0.0}. + +-spec row_new([{var(), float()}], float()) -> row(). +row_new(Coefs, Const) when is_float(Const) -> + row_ensure_invar({row_squash_multiples(lists:keysort(1, Coefs)), Const}). + +row_squash_multiples([{K, C1},{K, C2}|Ps]) -> + row_squash_multiples([{K,C1+C2}|Ps]); +row_squash_multiples([P|Ps]) -> [P|row_squash_multiples(Ps)]; +row_squash_multiples([]) -> []. + +row_ensure_invar({Coef, Const}) -> + {orddict:filter(fun(_, 0.0) -> false; (_, F) when is_float(F) -> true end, + Coef), Const}. + +row_const({_, Const}) -> Const. +row_coefs({Coefs, _}) -> orddict:to_list(Coefs). +row_size({Coefs, _}) -> orddict:size(Coefs). + +row_get(Var, {Coefs, _}) -> + case lists:keyfind(Var, 1, Coefs) of + false -> 0.0; + {_, Coef} -> Coef + end. + +row_set_coef(Var, 0.0, {Coefs, Const}) -> + {orddict:erase(Var, Coefs), Const}; +row_set_coef(Var, Coef, {Coefs, Const}) -> + {orddict:store(Var, Coef, Coefs), Const}. + +row_set_const(Const, {Coefs, _}) -> {Coefs, Const}. + +%% Lhs + Rhs*Factor +-spec row_addmul(row(), row(), float()) -> row(). +row_addmul({LhsCoefs, LhsConst}, {RhsCoefs, RhsConst}, Factor) + when is_float(Factor) -> + Coefs = row_addmul_coefs(LhsCoefs, RhsCoefs, Factor), + Const = LhsConst + RhsConst * Factor, + {Coefs, Const}. + +row_addmul_coefs(Ls, [], Factor) when is_float(Factor) -> Ls; +row_addmul_coefs([], Rs, Factor) when is_float(Factor) -> + row_scale_coefs(Rs, Factor); +row_addmul_coefs([L={LV, _}|Ls], Rs=[{RV,_}|_], Factor) + when LV < RV, is_float(Factor) -> + [L|row_addmul_coefs(Ls, Rs, Factor)]; +row_addmul_coefs(Ls=[{LV, _}|_], [{RV, RC}|Rs], Factor) + when LV > RV, is_float(RC), is_float(Factor) -> + [{RV, RC*Factor}|row_addmul_coefs(Ls, Rs, Factor)]; +row_addmul_coefs([{V, LC}|Ls], [{V, RC}|Rs], Factor) + when is_float(LC), is_float(RC), is_float(Factor) -> + case LC + RC * Factor of + 0.0 -> row_addmul_coefs(Ls, Rs, Factor); + C -> [{V,C}|row_addmul_coefs(Ls, Rs, Factor)] + end. + +row_scale(_, 0.0) -> row_empty(); +row_scale({RowCoefs, RowConst}, Factor) when is_float(Factor) -> + {row_scale_coefs(RowCoefs, Factor), RowConst * Factor}. + +row_scale_coefs([{V,C}|Cs], Factor) when is_float(Factor), is_float(C) -> + [{V,C*Factor}|row_scale_coefs(Cs, Factor)]; +row_scale_coefs([], Factor) when is_float(Factor) -> + []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Equation system ADT +%% +%% Stores a linear equation system, allowing for efficient updates and efficient +%% queries for all equations mentioning a variable. +%% +%% It is sort of like a "database" table of {Primary, Terms, Const} indexed both +%% on Primary as well as the vars (map keys) in Terms. +-type row() :: {Terms :: orddict:orddict(var(), float()), + Const :: float()}. +-type key() :: non_neg_integer(). +-type rev_index() :: #{var() => ordsets:ordset(key())}. +-record(eq_system, { + rows = #{} :: #{key() => row()}, + revidx = revidx_empty() :: rev_index(), + next_key = 0 :: key() + }). +-type eq_system() :: #eq_system{}. + +eqs_new() -> #eq_system{}. + +-spec eqs_insert(row(), eq_system()) -> {key(), eq_system()}. +eqs_insert(Row, EQS=#eq_system{next_key=NextKey0}) -> + Key = NextKey0, + NextKey = NextKey0 + 1, + {Key, eqs_insert(Key, Row, EQS#eq_system{next_key=NextKey})}. + +eqs_insert(Key, Row, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) -> + RevIdx = revidx_add(Key, Row, RevIdx0), + EQS#eq_system{rows=Rows#{Key => Row}, revidx=RevIdx}. + +eqs_put(Key, Row, EQS0) -> + eqs_insert(Key, Row, eqs_remove(Key, EQS0)). + +eqs_remove(Key, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) -> + OldRow = maps:get(Key, Rows), + RevIdx = revidx_remove(Key, OldRow, RevIdx0), + EQS#eq_system{rows = maps:remove(Key, Rows), revidx=RevIdx}. + +-spec eqs_get(key(), eq_system()) -> row(). +eqs_get(Key, #eq_system{rows=Rows}) -> maps:get(Key, Rows). + +%% Keys of all equations containing a nonzero coefficient for Var +-spec eqs_lookup(var(), eq_system()) -> ordsets:ordset(key()). +eqs_lookup(Var, #eq_system{revidx=RevIdx}) -> maps:get(Var, RevIdx). + +%% eqs_rows(#eq_system{rows=Rows}) -> maps:to_list(Rows). + +%% eqs_print(EQS) -> +%% lists:foreach(fun({_, Row}) -> +%% row_print(Row) +%% end, lists:sort(eqs_rows(EQS))). + +%% row_print(Row) -> +%% CoefStrs = [io_lib:format("~wl~w", [Coef, Var]) +%% || {Var, Coef} <- row_coefs(Row)], +%% CoefStr = lists:join(" + ", CoefStrs), +%% io:format("~w = ~s~n", [row_const(Row), CoefStr]). + +revidx_empty() -> #{}. + +-spec revidx_add(key(), row(), rev_index()) -> rev_index(). +revidx_add(Key, Row, RevIdx0) -> + orddict:fold(fun(Var, _Coef, RevIdx1) -> + ?ASSERT(_Coef /= 0.0), + RevIdx1#{Var => ordsets:add_element( + Key, maps:get(Var, RevIdx1, ordsets:new()))} + end, RevIdx0, row_coefs(Row)). + +-spec revidx_remove(key(), row(), rev_index()) -> rev_index(). +revidx_remove(Key, {Coefs, _}, RevIdx0) -> + orddict:fold(fun(Var, _Coef, RevIdx1) -> + case RevIdx1 of + #{Var := Keys0} -> + case ordsets:del_element(Key, Keys0) of + [] -> maps:remove(Var, RevIdx1); + Keys -> RevIdx1#{Var := Keys} + end + end + end, RevIdx0, Coefs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(FAST_ITERATIONS, 5). + +%% @doc Computes a rough approximation of BB weights. The approximation is +%% particularly poor (converges slowly) for recursive functions and loops. +-spec compute_fast(cfg(), target_module(), target_context()) -> bb_weights(). +compute_fast(CFG, TgtMod, TgtCtx) -> + Target = {TgtMod, TgtCtx}, + StartLb = hipe_gen_cfg:start_label(CFG), + RPO = reverse_postorder(CFG, Target), + PredProbs = [{L, pred_prob(L, CFG, Target)} || L <- RPO, L =/= StartLb], + Probs0 = (maps:from_list([{L, 0.0} || L <- RPO]))#{StartLb := 1.0}, + fast_iterate(?FAST_ITERATIONS, PredProbs, Probs0). + +fast_iterate(0, _Pred, Probs) -> Probs; +fast_iterate(Iters, Pred, Probs0) -> + fast_iterate(Iters-1, Pred, + fast_one(Pred, Probs0)). + +fast_one([{L, Pred}|Ls], Probs0) -> + Weight = fast_sum(Pred, Probs0, 0.0), + Probs = Probs0#{L => Weight}, + fast_one(Ls, Probs); +fast_one([], Probs) -> + Probs. + +fast_sum([{P,EWt}|Pred], Probs, Acc) when is_float(EWt), is_float(Acc) -> + case Probs of + #{P := PWt} when is_float(PWt) -> + fast_sum(Pred, Probs, Acc + PWt * EWt) + end; +fast_sum([], _Probs, Acc) when is_float(Acc) -> + Acc. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(branch_preds). +?TGT_IFACE_1(labels). +?TGT_IFACE_1(reverse_postorder). diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl index 41f1972df7..f87d9a5b61 100644 --- a/lib/hipe/opt/hipe_spillmin_color.erl +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -166,9 +166,13 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> %% build_ig(CFG, Live, Target, TempMap) -> - try build_ig0(CFG, Live, Target, TempMap) - catch error:Rsn -> exit({regalloc, build_ig, Rsn}) - end. + TempMapping = map_spilled_temporaries(TempMap), + TempMappingTable = setup_ets(TempMapping), + NumSpilled = length(TempMapping), + IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled), + Target, TempMap, TempMappingTable), + ets:delete(TempMappingTable), + {normalize_ig(IG), NumSpilled}. %% Creates an ETS table consisting of the keys given in List, with the values %% being an integer which is the position of the key in List. @@ -183,15 +187,6 @@ setup_ets0([X|Xs], Table, N) -> ets:insert(Table, {X, N}), setup_ets0(Xs, Table, N+1). -build_ig0(CFG, Live, Target, TempMap) -> - TempMapping = map_spilled_temporaries(TempMap), - TempMappingTable = setup_ets(TempMapping), - NumSpilled = length(TempMapping), - IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled), - Target, TempMap, TempMappingTable), - ets:delete(TempMappingTable), - {normalize_ig(IG), NumSpilled}. - build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) -> IG; build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) -> @@ -212,16 +207,26 @@ build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) -> build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping), build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping). -build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) -> +build_ig_instr(X, Live0, IG0, Target, TempMap, TempMapping) -> {Def, Use} = def_use(X, Target, TempMap), - ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]), + ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live0, X, Def,Use]), DefListMapped = list_map(Def, TempMapping, []), UseListMapped = list_map(Use, TempMapping, []), DefSetMapped = ordsets:from_list(DefListMapped), UseSetMapped = ordsets:from_list(UseListMapped), - NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG), - NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)), - {NewLive, NewIG}. + {Live1, IG1} = + analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped), + IG = interference_arcs(DefListMapped, ordsets:to_list(Live1), IG1), + Live = ordsets:union(UseSetMapped, ordsets:subtract(Live1, DefSetMapped)), + {Live, IG}. + +analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped) -> + case {is_spill_move(X, Target), DefSetMapped, UseSetMapped} of + {true, [Dst], [Src]} -> + {ordsets:del_element(Src, Live0), add_move(Src, Dst, IG0)}; + {_, _, _} -> + {Live0, IG0} + end. %% Given a list of Keys and an ets-table returns a list of the elements %% in Mapping corresponding to the Keys and appends Acc to this list. @@ -271,15 +276,6 @@ i_arcs(X, [Y|Ys], IG) -> %% throw an exception (the caller should retry with more stack slots) color(IG, StackSlots, NumNodes, Target) -> - try - color_0(IG, StackSlots, NumNodes, Target) - catch - error:Rsn -> - ?error_msg("Coloring failed with ~p~n", [Rsn]), - ?EXIT(Rsn) - end. - -color_0(IG, StackSlots, NumNodes, Target) -> ?report("simplification of IG~n", []), K = ordsets:size(StackSlots), Nodes = list_ig(IG), @@ -382,7 +378,8 @@ select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) -> select_color(X, IG, Cols, PhysRegs) -> UsedColors = get_colors(neighbors(X, IG), Cols), - Reg = select_unused_color(UsedColors, PhysRegs), + Preferences = get_colors(move_connected(X, IG), Cols), + Reg = select_unused_color(UsedColors, Preferences, PhysRegs), {Reg, set_color(X, Reg, Cols)}. %%%%%%%%%%%%%%%%%%%% @@ -396,10 +393,14 @@ get_colors([X|Xs], Cols) -> [R|get_colors(Xs, Cols)] end. -select_unused_color(UsedColors, PhysRegs) -> +select_unused_color(UsedColors, Preferences, PhysRegs) -> Summary = ordsets:from_list(UsedColors), - AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), - hd(AvailRegs). + case ordsets:subtract(ordsets:from_list(Preferences), Summary) of + [PreferredColor|_] -> PreferredColor; + _ -> + AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), + hd(AvailRegs) + end. push_colored(X, Stk) -> [{X, colorable} | Stk]. @@ -456,7 +457,11 @@ init_stackslots(NumSlots, Acc) -> %% %% Note: later on, we may wish to add 'move-related' support. --record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}). +-record(ig_info, { + neighbors = [] :: [_], + degree = 0 :: non_neg_integer(), + move_connected = [] :: [_] + }). empty_ig(NumNodes) -> hipe_vectors:new(NumNodes, #ig_info{}). @@ -467,16 +472,29 @@ degree(Info) -> neighbors(Info) -> Info#ig_info.neighbors. +move_connected(Info) -> + Info#ig_info.move_connected. + add_edge(X, X, IG) -> IG; add_edge(X, Y, IG) -> add_arc(X, Y, add_arc(Y, X, IG)). +add_move(X, X, IG) -> IG; +add_move(X, Y, IG) -> + add_move_arc(X, Y, add_move_arc(Y, X, IG)). + add_arc(X, Y, IG) -> Info = hipe_vectors:get(IG, X), Old = neighbors(Info), New = Info#ig_info{neighbors = [Y|Old]}, hipe_vectors:set(IG,X,New). +add_move_arc(X, Y, IG) -> + Info = hipe_vectors:get(IG, X), + Old = move_connected(Info), + New = Info#ig_info{move_connected = [Y|Old]}, + hipe_vectors:set(IG,X,New). + normalize_ig(IG) -> Size = hipe_vectors:size(IG), normalize_ig(Size-1, IG). @@ -486,7 +504,8 @@ normalize_ig(-1, IG) -> normalize_ig(I, IG) -> Info = hipe_vectors:get(IG, I), N = ordsets:from_list(neighbors(Info)), - NewInfo = Info#ig_info{neighbors = N, degree = length(N)}, + M = ordsets:subtract(ordsets:from_list(move_connected(Info)), N), + NewInfo = Info#ig_info{neighbors = N, degree = length(N), move_connected = M}, NewIG = hipe_vectors:set(IG, I, NewInfo), normalize_ig(I-1, NewIG). @@ -494,6 +513,10 @@ neighbors(X, IG) -> Info = hipe_vectors:get(IG, X), Info#ig_info.neighbors. +move_connected(X, IG) -> + Info = hipe_vectors:get(IG, X), + Info#ig_info.move_connected. + decrement_degree(X, IG) -> Info = hipe_vectors:get(IG, X), Degree = degree(Info), @@ -555,3 +578,6 @@ def_use(X, Target={TgtMod,TgtCtx}, TempMap) -> reg_names(Regs, {TgtMod,TgtCtx}) -> [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +is_spill_move(Instr, {TgtMod,TgtCtx}) -> + TgtMod:is_spill_move(Instr, TgtCtx). diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl index df9f193fa3..63ecd0a0b8 100644 --- a/lib/hipe/ppc/hipe_ppc.erl +++ b/lib/hipe/ppc/hipe_ppc.erl @@ -98,6 +98,9 @@ pseudo_move_dst/1, pseudo_move_src/1, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, pseudo_tailcall_func/1, pseudo_tailcall_stkargs/1, @@ -131,6 +134,9 @@ pseudo_fmove_dst/1, pseudo_fmove_src/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + mk_defun/8, defun_mfa/1, defun_formals/1, @@ -412,6 +418,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. pseudo_move_src(#pseudo_move{src=Src}) -> Src. +mk_pseudo_spill_move(Dst, Temp, Src) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage) -> #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage}. pseudo_tailcall_func(#pseudo_tailcall{func=FunC}) -> FunC. @@ -495,6 +505,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. +mk_pseudo_spill_fmove(Dst, Temp, Src) -> + #pseudo_spill_fmove{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> #defun{mfa=MFA, formals=Formals, code=Code, data=Data, isclosure=IsClosure, isleaf=IsLeaf, diff --git a/lib/hipe/ppc/hipe_ppc.hrl b/lib/hipe/ppc/hipe_ppc.hrl index a96692c52e..3eef8be487 100644 --- a/lib/hipe/ppc/hipe_ppc.hrl +++ b/lib/hipe/ppc/hipe_ppc.hrl @@ -87,6 +87,7 @@ -record(pseudo_call_prepare, {nrstkargs}). -record(pseudo_li, {dst, imm}). -record(pseudo_move, {dst, src}). +-record(pseudo_spill_move, {dst, temp, src}). -record(pseudo_tailcall, {func, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(store, {stop, src, disp, base}). % non-indexed, non-update form @@ -99,6 +100,7 @@ -record(fp_binary, {fp_binop, dst, src1, src2}). -record(fp_unary, {fp_unop, dst, src}). -record(pseudo_fmove, {dst, src}). +-record(pseudo_spill_fmove, {dst, temp, src}). %%% Function definitions. diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl index 66817837df..b0f57e5582 100644 --- a/lib/hipe/ppc/hipe_ppc_assemble.erl +++ b/lib/hipe/ppc/hipe_ppc_assemble.erl @@ -32,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, hipe_rtl_arch:word_size()), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl index f17c0ac503..d44d38f38d 100644 --- a/lib/hipe/ppc/hipe_ppc_cfg.erl +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -21,8 +21,8 @@ bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1, params/1, reverse_postorder/1]). --export([arity/1]). -%%%-export([redirect_jmp/3, arity/1]). +-export([redirect_jmp/3, arity/1]). +-export([branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(BREADTH_ORDER,true). @@ -75,11 +75,30 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #bctr{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. --ifdef(notdef). redirect_jmp(I, Old, New) -> case I of #b_label{label=Label} -> @@ -93,10 +112,16 @@ redirect_jmp(I, Old, New) -> if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; true -> I1 end; - %% handle pseudo_call too? - _ -> I + #pseudo_call{sdesc=SDesc0, contlab=ContLab0} -> + SDesc = case SDesc0 of + #ppc_sdesc{exnlab=Old} -> SDesc0#ppc_sdesc{exnlab=New}; + #ppc_sdesc{exnlab=_} -> SDesc0 + end, + ContLab = if Old =:= ContLab0 -> New; + true -> ContLab0 + end, + I#pseudo_call{sdesc=SDesc, contlab=ContLab} end. --endif. mk_goto(Label) -> hipe_ppc:mk_b_label(Label). diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl index 9a99611493..d8a864f7d5 100644 --- a/lib/hipe/ppc/hipe_ppc_defuse.erl +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -41,6 +41,7 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_li{dst=Dst} -> [Dst]; #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_spill_move{dst=Dst,temp=Temp} -> [Dst, Temp]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #unary{dst=Dst} -> [Dst]; _ -> [] @@ -71,6 +72,7 @@ insn_use_gpr(I) -> #mtspr{src=Src} -> [Src]; #pseudo_call{sdesc=#ppc_sdesc{arity=Arity}} -> arity_use_gpr(Arity); #pseudo_move{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{arity=Arity,stkargs=StkArgs} -> addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), arity_use_gpr(Arity))); #store{src=Src,base=Base} -> addtemp(Src, [Base]); @@ -110,6 +112,7 @@ insn_def_fpr(I) -> #fp_binary{dst=Dst} -> [Dst]; #fp_unary{dst=Dst} -> [Dst]; #pseudo_fmove{dst=Dst} -> [Dst]; + #pseudo_spill_fmove{dst=Dst,temp=Temp} -> [Dst, Temp]; _ -> [] end. @@ -126,6 +129,7 @@ insn_use_fpr(I) -> #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); #fp_unary{src=Src} -> [Src]; #pseudo_fmove{src=Src} -> [Src]; + #pseudo_spill_fmove{src=Src} -> [Src]; _ -> [] end. diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index a91cb18cc2..b88b75a5bd 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -66,10 +66,14 @@ do_insn(I, LiveOut, Context, FPoff) -> do_pseudo_call_prepare(I, FPoff); #pseudo_move{} -> {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #pseudo_fmove{} -> {do_pseudo_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; _ -> {[I], FPoff} end. @@ -98,6 +102,22 @@ do_pseudo_move(I, Context, FPoff) -> end end. +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{dst=Dst,temp=Temp,src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_ppc:mk_pseudo_move(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load(hipe_ppc:ldop_word(), Temp, SrcOffset, mk_sp(), + mk_store(hipe_ppc:stop_word(), Temp, DstOffset, mk_sp(), [])) + end + end. + do_pseudo_fmove(I, Context, FPoff) -> Dst = hipe_ppc:pseudo_fmove_dst(I), Src = hipe_ppc:pseudo_fmove_src(I), @@ -115,6 +135,22 @@ do_pseudo_fmove(I, Context, FPoff) -> end end. +do_pseudo_spill_fmove(I, Context, FPoff) -> + #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_fmove(hipe_ppc:mk_pseudo_fmove(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + hipe_ppc:mk_fload(Temp, SrcOffset, mk_sp(), 0) + ++ hipe_ppc:mk_fstore(Temp, DstOffset, mk_sp(), 0) + end + end. + pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl index 74ef7475eb..bca504d754 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -41,6 +41,7 @@ ra_insn(I, Map, FPMap) -> #mtspr{} -> ra_mtspr(I, Map); #pseudo_li{} -> ra_pseudo_li(I, Map); #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #store{} -> ra_store(I, Map); #storex{} -> ra_storex(I, Map); @@ -52,6 +53,7 @@ ra_insn(I, Map, FPMap) -> #fp_binary{} -> ra_fp_binary(I, FPMap); #fp_unary{} -> ra_fp_unary(I, FPMap); #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); + #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap); _ -> I end. @@ -98,6 +100,12 @@ ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> NewSrc = ra_temp(Src, Map), I#pseudo_move{dst=NewDst,src=NewSrc}. +ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewTemp = ra_temp(Temp, Map), + NewSrc = ra_temp(Src, Map), + I#pseudo_spill_move{dst=NewDst,temp=NewTemp,src=NewSrc}. + ra_pseudo_tailcall(I=#pseudo_tailcall{stkargs=StkArgs}, Map) -> NewStkArgs = ra_args(StkArgs, Map), I#pseudo_tailcall{stkargs=NewStkArgs}. @@ -156,6 +164,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, FPMap) -> NewSrc = ra_temp_fp(Src, FPMap), I#pseudo_fmove{dst=NewDst,src=NewSrc}. +ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src}, + FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewTemp = ra_temp_fp(Temp, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#pseudo_spill_fmove{dst=NewDst,temp=NewTemp,src=NewSrc}. + ra_args([Arg|Args], Map) -> [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; ra_args([], _) -> diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl index 95aa294fe5..0a97129666 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -57,6 +57,7 @@ do_insn(I, TempMap, Strategy) -> #mtspr{} -> do_mtspr(I, TempMap, Strategy); #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #store{} -> do_store(I, TempMap, Strategy); #storex{} -> do_storex(I, TempMap, Strategy); #unary{} -> do_unary(I, TempMap, Strategy); @@ -117,18 +118,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move and pseudo_tailcall are special cases: in - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move, pseudo_spill_move, and pseudo_tailcall are + %% special cases: in all other instructions, all temps + %% must be non-pseudos after register allocation. + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + do_store(I=#store{src=Src,base=Base}, TempMap, Strategy) -> {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy), diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl index 5ec5f29577..7342053620 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -42,6 +42,7 @@ do_insn(I, TempMap) -> #fp_binary{} -> do_fp_binary(I, TempMap); #fp_unary{} -> do_fp_unary(I, TempMap); #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); + #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap); _ -> {[I], false} end. @@ -81,15 +82,22 @@ do_fp_unary(I=#fp_unary{dst=Dst,src=Src}, TempMap) -> {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. do_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, TempMap) -> - case temp_is_spilled(Dst, TempMap) of - true -> - {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), - NewI = I#pseudo_fmove{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_fmove + Temp = clone(Src), + NewI = #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + %%% Fix Dst and Src operands. fix_src(Src, TempMap) -> diff --git a/lib/hipe/ppc/hipe_ppc_subst.erl b/lib/hipe/ppc/hipe_ppc_subst.erl index 1cd18b5c01..e282b22774 100644 --- a/lib/hipe/ppc/hipe_ppc_subst.erl +++ b/lib/hipe/ppc/hipe_ppc_subst.erl @@ -48,6 +48,8 @@ insn_temps(T, I) -> #pseudo_call_prepare{} -> I; #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; #pseudo_move{dst=D,src=S} -> I#pseudo_move{dst=T(D),src=T(S)}; + #pseudo_spill_move{dst=D,temp=U,src=S} -> + I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)}; #pseudo_tailcall{func=F,stkargs=Stk} when not is_record(F, ppc_temp) -> I#pseudo_tailcall{stkargs=lists:map(A,Stk)}; #pseudo_tailcall_prepare{} -> I; @@ -62,7 +64,9 @@ insn_temps(T, I) -> #fp_binary{dst=D,src1=L,src2=R} -> I#fp_binary{dst=T(D),src1=T(L),src2=T(R)}; #fp_unary{dst=D,src=S} -> I#fp_unary{dst=T(D),src=T(S)}; - #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)} + #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)}; + #pseudo_spill_fmove{dst=D,temp=U,src=S} -> + I#pseudo_spill_fmove{dst=T(D),temp=T(U),src=T(S)} end. -spec oper_temps(subst_fun(), oper()) -> oper(). diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index 209f230a9b..81a92e5d35 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -50,8 +50,10 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \ hipe_optimistic_regalloc \ hipe_coalescing_regalloc \ hipe_graph_coloring_regalloc \ + hipe_range_split \ hipe_regalloc_loop \ hipe_regalloc_prepass \ + hipe_restore_reuse \ hipe_ls_regalloc \ hipe_ppc_specific hipe_ppc_specific_fp \ hipe_sparc_specific hipe_sparc_specific_fp \ diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 9c94539bc6..d592ba391c 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -30,6 +30,7 @@ def_use/2, is_arg/2, %% used by hipe_ls_regalloc is_move/2, + is_spill_move/2, is_fixed/2, %% used by hipe_graph_coloring_regalloc is_global/2, is_precoloured/2, @@ -50,12 +51,19 @@ -export([check_and_rewrite/3, check_and_rewrite/4]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). + %%---------------------------------------------------------------------------- -include("../flow/cfg.hrl"). @@ -126,8 +134,8 @@ temp0(_) -> all_precoloured(Ctx) -> allocatable(Ctx). -is_precoloured(Reg, Ctx) -> - lists:member(Reg,all_precoloured(Ctx)). +is_precoloured(Reg, _) -> + hipe_amd64_registers:is_precoloured_sse2(Reg). physical_name(Reg, _) -> Reg. @@ -152,6 +160,9 @@ bb(CFG, L, _) -> update_bb(CFG,L,BB,_) -> hipe_x86_cfg:bb_add(CFG,L,BB). +branch_preds(Instr,_) -> + hipe_x86_cfg:branch_preds(Instr). + %% AMD64 stuff def_use(Instruction, _) -> @@ -184,10 +195,34 @@ is_move(Instruction, _) -> andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst); false -> false end. + +is_spill_move(Instruction,_) -> + hipe_x86:is_pseudo_spill_fmove(Instruction). reg_nr(Reg, _) -> hipe_x86:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_x86:mk_fmove(Src, Dst). + +mk_goto(Label, _) -> + hipe_x86:mk_jmp_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_x86_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(x86). + new_reg_nr(_) -> hipe_gensym:get_next_var(x86). diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index cef22e5af9..7ebc6aa336 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -24,6 +24,7 @@ ,reg_nr/2 ,def_use/2 ,is_move/2 + ,is_spill_move/2 ,is_precoloured/2 ,var_range/2 ,allocatable/1 @@ -46,12 +47,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, no_context) -> hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). @@ -115,6 +123,9 @@ bb(CFG,L,_) -> update_bb(CFG,L,BB,_) -> hipe_arm_cfg:bb_add(CFG,L,BB). +branch_preds(Branch,_) -> + hipe_arm_cfg:branch_preds(Branch). + %% ARM stuff def_use(Instruction, Ctx) -> @@ -144,9 +155,33 @@ is_move(Instruction, _) -> false -> false end. +is_spill_move(Instruction, _) -> + hipe_arm:is_pseudo_spill_move(Instruction). + reg_nr(Reg, _) -> hipe_arm:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_arm:mk_pseudo_move(Dst, Src). + +mk_goto(Label, _) -> + hipe_arm:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_arm_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(arm). + new_reg_nr(_) -> hipe_gensym:get_next_var(arm). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl index e8ccbec9f1..b8f0a1974c 100644 --- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -914,7 +914,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> %% limit are extremely expensive. getCost(Node, IG, SpillLimit) -> - case Node > SpillLimit of + case Node >= SpillLimit of true -> inf; false -> hipe_ig:node_spill_cost(Node, IG) end. diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl index 07aa812f4a..f82d3a2cbc 100644 --- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -209,8 +209,8 @@ color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, %% Any nodes above the spillimit must be colored first... MustNotSpill = - if NumNodes > SpillLimit+1 -> - sort_on_degree(lists:seq(SpillLimit+1,NumNodes-1) -- Low,IG); + if NumNodes > SpillLimit -> + sort_on_degree(lists:seq(SpillLimit,NumNodes-1) -- Low,IG); true -> [] end, @@ -401,7 +401,7 @@ spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) -> true -> spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); false -> - if N > SpillLimit -> + if N >= SpillLimit -> spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); true -> [{spill_cost_of(N,Spill)/Deg,N} | diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl index b96920cbcf..a019c46b90 100644 --- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -1933,7 +1933,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> %% limit are extremely expensive. getCost(Node, IG, SpillLimit) -> - case Node > SpillLimit of + case Node >= SpillLimit of true -> inf; false -> SpillCost = hipe_ig:node_spill_cost(Node, IG), diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index a6450b4d96..81bb551bd2 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -24,6 +24,7 @@ ,reg_nr/2 ,def_use/2 ,is_move/2 + ,is_spill_move/2 ,is_precoloured/2 ,var_range/2 ,allocatable/1 @@ -46,12 +47,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, _) -> hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). @@ -115,6 +123,9 @@ bb(CFG,L,_) -> update_bb(CFG,L,BB,_) -> hipe_ppc_cfg:bb_add(CFG,L,BB). +branch_preds(Instr,_) -> + hipe_ppc_cfg:branch_preds(Instr). + %% PowerPC stuff def_use(Instruction, Ctx) -> @@ -144,9 +155,24 @@ is_move(Instruction, _) -> false -> false end. +is_spill_move(Instruction, _) -> + hipe_ppc:is_pseudo_spill_move(Instruction). + reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_ppc:mk_pseudo_move(Dst, Src). + +mk_goto(Label, _) -> + hipe_ppc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(ppc). + new_reg_nr(_) -> hipe_gensym:get_next_var(ppc). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 23cb6c0318..dcfdf6592c 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -24,6 +24,7 @@ ,reg_nr/2 ,def_use/2 ,is_move/2 + ,is_spill_move/2 ,is_precoloured/2 ,var_range/2 ,allocatable/1 @@ -46,12 +47,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, _) -> hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). @@ -108,6 +116,9 @@ bb(CFG, L, _) -> update_bb(CFG,L,BB,_) -> hipe_ppc_cfg:bb_add(CFG,L,BB). +branch_preds(Instr,_) -> + hipe_ppc_cfg:branch_preds(Instr). + %% PowerPC stuff def_use(I, Ctx) -> @@ -125,9 +136,24 @@ defines_all_alloc(I, _) -> is_move(I, _) -> hipe_ppc:is_pseudo_fmove(I). +is_spill_move(I, _) -> + hipe_ppc:is_pseudo_spill_fmove(I). + reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_ppc:mk_pseudo_fmove(Dst, Src). + +mk_goto(Label, _) -> + hipe_ppc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(ppc). + new_reg_nr(_) -> hipe_gensym:get_next_var(ppc). diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl new file mode 100644 index 0000000000..39b086d9f7 --- /dev/null +++ b/lib/hipe/regalloc/hipe_range_split.erl @@ -0,0 +1,1187 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% TEMPORARY LIVE RANGE SPLITTING PASS +%% +%% Live range splitting is useful to allow a register allocator to allocate a +%% temporary to register for a part of its lifetime, even if it cannot be for +%% the entirety. This improves register allocation quality, at the cost of +%% making the allocation problem more time and memory intensive to solve. +%% +%% Optimal allocation can be achieved if all temporaries are split at every +%% program point (between all instructions), but this makes register allocation +%% infeasably slow in practice. Instead, this module uses heuristics to choose +%% which temporaries should have their live ranges split, and at which points. +%% +%% The range splitter only considers temps which are live during a call +%% instruction, since they're known to be spilled. The control-flow graph is +%% partitioned at call instructions and splitting decisions are made separately +%% for each partition. The register copy of a temp (if any) gets a separate name +%% in each partition. +%% +%% There are three different ways the range splitter may choose to split a +%% temporary in a program partition: +%% +%% * Mode1: Spill the temp before calls, and restore it after them +%% * Mode2: Spill the temp after definitions, restore it after calls +%% * Mode3: Spill the temp after definitions, restore it before uses +%% +%% To pick which of these should be used for each temp×partiton pair, the range +%% splitter uses a cost function. The cost is simply the sum of the cost of all +%% expected stack accesses, and the cost for an individual stack access is based +%% on the probability weight of the basic block that it resides in. This biases +%% the range splitter so that it attempts moving stack accesses from a functions +%% hot path to the cold path. +%% +%% The heuristic has a couple of tuning knobs, adjusting its preference for +%% different spilling modes, aggressiveness, and how much influence the basic +%% block probability weights have. +%% +%% Edge case not handled: Call instructions directly defining a pseudo. In that +%% case, if that pseudo has been selected for mode2 spills, no spill is inserted +%% after the call. +-module(hipe_range_split). + +-export([split/5]). + +-compile(inline). + +%% -define(DO_ASSERT, 1). +%% -define(DEBUG, 1). +-include("../main/hipe.hrl"). + +%% Heuristic tuning constants +-define(DEFAULT_MIN_GAIN, 1.1). % option: range_split_min_gain +-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge +-define(DEFAULT_WEIGHT_POWER, 2). % option: range_split_weight_power +-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)). +-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)). +-define(HEUR_MAX_TEMPS, 20000). + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type liveness() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. +-type liveset() :: ordsets:ordset(temp()). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec split(target_cfg(), liveness(), target_module(), target_context(), + comp_options()) + -> target_cfg(). +split(TCFG0, Liveness, TargetMod, TargetContext, Options) -> + Target = {TargetMod, TargetContext}, + NoTemps = number_of_temporaries(TCFG0, Target), + if NoTemps > ?HEUR_MAX_TEMPS -> + ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n", + [?MODULE, NoTemps]), + hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext); + true -> + Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options), + {CFG0, Temps} = convert(TCFG0, Target), + Avail = avail_analyse(TCFG0, Liveness, Target), + Defs = def_analyse(CFG0, TCFG0), + RDefs = rdef_analyse(CFG0), + PLive = plive_analyse(CFG0), + {CFG, DUCounts, Costs, DSets0} = + scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target), + {DSets, _} = hipe_dsets:to_map(DSets0), + Renames = decide(DUCounts, Costs, Target, Options), + rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Internal program representation +%% +%% Second pass: Convert cfg to internal representation + +-record(cfg, { + rpo_labels :: [label()], + bbs :: #{label() => bb()} + }). +-type cfg() :: #cfg{}. + +cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS). + +cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO). + +-record(bb, { + code :: [code_elem()], + %% If the last instruction of code defines all allocatable registers + has_call :: boolean(), + succ :: [label()] + }). +-type bb() :: #bb{}. +-type code_elem() :: instr() | mode2_spills() | mode3_restores(). + +bb_code(#bb{code=Code}) -> Code. +bb_has_call(#bb{has_call=HasCall}) -> HasCall. +bb_succ(#bb{succ=Succ}) -> Succ. + +bb_butlast(#bb{code=Code}) -> + bb_butlast_1(Code). + +bb_butlast_1([_Last]) -> []; +bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)]. + +bb_last(#bb{code=Code}) -> lists:last(Code). + +-record(instr, { + i :: target_instr(), + def :: ordsets:ordset(temp()), + use :: ordsets:ordset(temp()) + }). +-type instr() :: #instr{}. + +-record(mode2_spills, { + temps :: ordsets:ordset(temp()) + }). +-type mode2_spills() :: #mode2_spills{}. + +-record(mode3_restores, { + temps :: ordsets:ordset(temp()) + }). +-type mode3_restores() :: #mode3_restores{}. + +-spec convert(target_cfg(), target()) -> {cfg(), temps()}. +convert(CFG, Target) -> + RPO = reverse_postorder(CFG, Target), + {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []), + {#cfg{rpo_labels = RPO, + bbs = maps:from_list(BBsList)}, + Temps}. + +convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps}; +convert_bbs([L|Ls], CFG, Target, Temps0, Acc) -> + Succs = hipe_gen_cfg:succ(CFG, L), + TBB = bb(CFG, L, Target), + TCode = hipe_bb:code(TBB), + {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []), + HasCall = defines_all_alloc(Last#instr.i, Target), + BB = #bb{code = Code, + has_call = HasCall, + succ = Succs}, + convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]). + +convert_code([], _Target, Temps, [Last|_]=Acc) -> + {lists:reverse(Acc), Last, Temps}; +convert_code([TI|TIs], Target, Temps0, Acc) -> + {TDef, TUse} = def_use(TI, Target), + I = #instr{i = TI, + def = ordsets:from_list(reg_names(TDef, Target)), + use = ordsets:from_list(reg_names(TUse, Target))}, + Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)), + convert_code(TIs, Target, Temps, [I|Acc]). + +-type temps() :: #{temp() => target_temp()}. +add_temps([], _Target, Temps) -> Temps; +add_temps([T|Ts], Target, Temps) -> + add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL +%% splits) +-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}. +-type defs() :: #{label() => defsetf()}. + +-spec def_analyse(cfg(), target_cfg()) -> defs(). +def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) -> + Defs0 = def_init(CFG), + def_dataf(RPO, TCFG, Defs0). + +-spec def_init(cfg()) -> defsi(). +def_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + false -> def_init_scan(bb_code(BB), defseti_new()); + true -> + {call, def_init_scan(bb_butlast(BB), defseti_new()), + defseti_from_ordset((bb_last(BB))#instr.def)} + end} + end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]). + +def_init_scan([], Defset) -> Defset; +def_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = defseti_add_ordset(Def, Defset0), + def_init_scan(Is, Defset). + +-spec def_dataf([label()], target_cfg(), defsi()) -> defs(). +def_dataf(Labels, TCFG, Defs0) -> + case def_dataf_once(Labels, TCFG, Defs0, 0) of + {Defs, 0} -> + def_finalise(Defs); + {Defs, _Changed} -> + def_dataf(Labels, TCFG, Defs) + end. + +-spec def_finalise(defsi()) -> defs(). +def_finalise(Defs) -> + maps:from_list([{K, defseti_finalise(BL)} + || {K, {call, BL, _}} <- maps:to_list(Defs)]). + +-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer()) + -> {defsi(), non_neg_integer()}. +def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed}; +def_dataf_once([L|Ls], TCFG, Defs0, Changed0) -> + AddPreds = + fun(Defset1) -> + lists:foldl(fun(P, Defset2) -> + defseti_union(defout(P, Defs0), Defset2) + end, Defset1, hipe_gen_cfg:pred(TCFG, L)) + end, + Defset = + case Defset0 = maps:get(L, Defs0) of + {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout}; + _ -> AddPreds(Defset0) + end, + Changed = case Defset =:= Defset0 of + true -> Changed0; + false -> Changed0+1 + end, + def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed). + +-spec defout(label(), defsi()) -> defseti(). +defout(L, Defs) -> + case maps:get(L, Defs) of + {call, _DefButLast, Defout} -> Defout; + Defout -> Defout + end. + +-spec defbutlast(label(), defs()) -> defsetf(). +defbutlast(L, Defs) -> maps:get(L, Defs). + +-spec defseti_new() -> defseti(). +-spec defseti_union(defseti(), defseti()) -> defseti(). +-spec defseti_add_ordset(ordset:ordset(temp()), defseti()) -> defseti(). +-spec defseti_from_ordset(ordset:ordset(temp())) -> defseti(). +-spec defseti_finalise(defseti()) -> defsetf(). +-spec defsetf_member(temp(), defsetf()) -> boolean(). +-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf()) + -> ordsets:ordset(temp()). + +-type defseti() :: bitord(). +defseti_new() -> bitord_new(). +defseti_union(A, B) -> bitord_union(A, B). +defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D). +defseti_from_ordset(OS) -> bitord_from_ordset(OS). +defseti_finalise(D) -> bitarr_from_bitord(D). + +-type defsetf() :: bitarr(). +defsetf_member(E, D) -> bitarr_get(E, D). + +defsetf_intersect_ordset([], _D) -> []; +defsetf_intersect_ordset([E|Es], D) -> + case bitarr_get(E, D) of + true -> [E|defsetf_intersect_ordset(Es,D)]; + false -> defsetf_intersect_ordset(Es,D) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines +%% in mode2) +-type rdefsi() :: #{label() => + {call, rdefseti(), [label()]} + | {nocall, rdefseti(), rdefseti(), [label()]}}. +-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}. + +-spec rdef_analyse(cfg()) -> rdefs(). +rdef_analyse(CFG = #cfg{rpo_labels=RPO}) -> + Defs0 = rdef_init(CFG), + PO = rdef_postorder(RPO, CFG, []), + rdef_dataf(PO, Defs0). + +%% Filter out 'call' labels, since they don't change +-spec rdef_postorder([label()], cfg(), [label()]) -> [label()]. +rdef_postorder([], _CFG, Acc) -> Acc; +rdef_postorder([L|Ls], CFG, Acc) -> + case bb_has_call(cfg_bb(L, CFG)) of + true -> rdef_postorder(Ls, CFG, Acc); + false -> rdef_postorder(Ls, CFG, [L|Acc]) + end. + +-spec rdef_init(cfg()) -> rdefsi(). +rdef_init(#cfg{bbs = BBs}) -> + maps:from_list( + [{L, case HasCall of + true -> + Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()), + {call, Defin, Succs}; + false -> + Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()), + {nocall, Gen, rdefseti_top(), Succs} + end} + || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti(). +rdef_init_scan([], Defset) -> Defset; +rdef_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = rdefseti_add_ordset(Def, Defset0), + rdef_init_scan(Is, Defset). + +-spec rdef_dataf([label()], rdefsi()) -> rdefs(). +rdef_dataf(Labels, Defs0) -> + case rdef_dataf_once(Labels, Defs0, 0) of + {Defs, 0} -> + rdef_finalise(Defs); + {Defs, _Changed} -> + rdef_dataf(Labels, Defs) + end. + +-spec rdef_finalise(rdefsi()) -> rdefs(). +rdef_finalise(Defs) -> + maps:map(fun(L, V) -> + Succs = rsuccs_val(V), + Defout0 = rdefout_intersect(L, Defs, rdefseti_top()), + {final, rdefset_finalise(Defout0), Succs} + end, Defs). + +-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer()) + -> {rdefsi(), non_neg_integer()}. +rdef_dataf_once([], Defs, Changed) -> {Defs, Changed}; +rdef_dataf_once([L|Ls], Defs0, Changed0) -> + #{L := {nocall, Gen, Defin0, Succs}} = Defs0, + Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)), + Defset = {nocall, Gen, Defin, Succs}, + Changed = case Defin =:= Defin0 of + true -> Changed0; + false -> Changed0+1 + end, + rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed). + +-spec rdefin(label(), rdefsi()) -> rdefseti(). +rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)). +rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin; +rdefin_val({call, Defin, _Succs}) -> Defin. + +-spec rsuccs(label(), rdefsi()) -> [label()]. +rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)). +rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs; +rsuccs_val({call, _Defin, Succs}) -> Succs. + +-spec rdefout(label(), rdefs()) -> rdefsetf(). +rdefout(L, Defs) -> + #{L := {final, Defout, _Succs}} = Defs, + Defout. + +-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti(). +rdefout_intersect(L, Defs, Init) -> + lists:foldl(fun(S, Acc) -> + rdefseti_intersect(rdefin(S, Defs), Acc) + end, Init, rsuccs(L, Defs)). + +-type rdefseti() :: bitord() | top. +rdefseti_top() -> top. +rdefseti_empty() -> bitord_new(). +-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti(). +rdefseti_from_ordset(OS) -> bitord_from_ordset(OS). + +-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti(). +rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf +rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D). + +-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_union(top, _) -> top; +rdefseti_union(_, top) -> top; +rdefseti_union(A, B) -> bitord_union(A, B). + +-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_intersect(top, D) -> D; +rdefseti_intersect(D, top) -> D; +rdefseti_intersect(A, B) -> bitord_intersect(A, B). + +-type rdefsetf() :: {arr, bitarr()} | top. +-spec rdefset_finalise(rdefseti()) -> rdefsetf(). +rdefset_finalise(top) -> top; +rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}. + +%% rdefsetf_top() -> top. +rdefsetf_empty() -> {arr, bitarr_new()}. + +-spec rdefsetf_add_ordset(ordset:ordset(temp()), rdefsetf()) -> rdefsetf(). +rdefsetf_add_ordset(_, top) -> top; +rdefsetf_add_ordset(OS, {arr, Arr}) -> + {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}. + +-spec rdef_step(instr(), rdefsetf()) -> rdefsetf(). +rdef_step(#instr{def=Def}, Defset) -> + %% ?ASSERT(not defines_all_alloc(I, Target)), + rdefsetf_add_ordset(Def, Defset). + +-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf()) + -> ordsets:ordset(temp()). +ordset_subtract_rdefsetf(_, top) -> []; +ordset_subtract_rdefsetf(OS, {arr, Arr}) -> + %% Lazy implementation; could do better if OS can grow + lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Integer sets represented as bit sets +%% +%% Two representations; bitord() and bitarr() +-define(LIMB_IX_BITS, 11). +-define(LIMB_BITS, (1 bsl ?LIMB_IX_BITS)). +-define(LIMB_IX(Index), (Index bsr ?LIMB_IX_BITS)). +-define(BIT_IX(Index), (Index band (?LIMB_BITS - 1))). +-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))). + +%% bitord(): fast at union/2 and can be compared for equality with '=:=' +-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitord_new() -> bitord(). +bitord_new() -> []. + +-spec bitord_union(bitord(), bitord()) -> bitord(). +bitord_union(Lhs, Rhs) -> + orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs). + +-spec bitord_intersect(bitord(), bitord()) -> bitord(). +bitord_intersect([], _) -> []; +bitord_intersect(_, []) -> []; +bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) -> + [{K, L band R} | bitord_intersect(Ls, Rs)]; +bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK -> + bitord_intersect(Ls, Rs); +bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK -> + bitord_intersect(Ls, Rs). + +-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord(). +bitord_from_ordset([]) -> []; +bitord_from_ordset([B|Bs]) -> + bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)). + +bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) -> + bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B)); +bitord_from_ordset_1([B|Bs], Key, Val) -> + [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))]; +bitord_from_ordset_1([], Key, Val) -> [{Key, Val}]. + +%% bitarr(): fast (enough) at get/2 +-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitarr_new() -> bitarr(). +bitarr_new() -> array:new({default, 0}). + +-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean(). +bitarr_get(Index, Array) -> + Limb = array:get(?LIMB_IX(Index), Array), + 0 =/= (Limb band ?BIT_MASK(Index)). + +-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr(). +bitarr_set(Index, Array) -> + Limb0 = array:get(?LIMB_IX(Index), Array), + Limb = Limb0 bor ?BIT_MASK(Index), + array:set(?LIMB_IX(Index), Limb, Array). + +-spec bitarr_from_bitord(bitord()) -> bitarr(). +bitarr_from_bitord(Ord) -> + array:from_orddict(Ord, 0). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Sixth pass: Partition-local liveness analysis +%% +%% As temps are not spilled when exiting a partition in mode2, only +%% partition-local uses need to be considered when deciding which temps need +%% restoring at partition entry. + +-type plive() :: #{label() => + {call, liveset(), [label()]} + | {nocall, {liveset(), liveset()}, liveset(), [label()]}}. + +-spec plive_analyse(cfg()) -> plive(). +plive_analyse(CFG) -> + Defs0 = plive_init(CFG), + PO = cfg_postorder(CFG), + plive_dataf(PO, Defs0). + +-spec plive_init(cfg()) -> plive(). +plive_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + true -> + {Gen, _} = plive_init_scan(bb_code(BB)), + {call, Gen, Succs}; + false -> + GenKill = plive_init_scan(bb_code(BB)), + {nocall, GenKill, liveset_empty(), Succs} + end} + end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec plive_init_scan([instr()]) -> {liveset(), liveset()}. +plive_init_scan([]) -> {liveset_empty(), liveset_empty()}; +plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) -> + {Gen0, Kill0} = plive_init_scan(Is), + Gen1 = liveset_subtract(Gen0, InstrKill), + Gen = liveset_union(Gen1, InstrGen), + Kill1 = liveset_union(Kill0, InstrKill), + Kill = liveset_subtract(Kill1, InstrGen), + {Gen, Kill}. + +-spec plive_dataf([label()], plive()) -> plive(). +plive_dataf(Labels, PLive0) -> + case plive_dataf_once(Labels, PLive0, 0) of + {PLive, 0} -> PLive; + {PLive, _Changed} -> + plive_dataf(Labels, PLive) + end. + +-spec plive_dataf_once([label()], plive(), non_neg_integer()) -> + {plive(), non_neg_integer()}. +plive_dataf_once([], PLive, Changed) -> {PLive, Changed}; +plive_dataf_once([L|Ls], PLive0, Changed0) -> + Liveset = + case Liveset0 = maps:get(L, PLive0) of + {call, Livein, Succs} -> + {call, Livein, Succs}; + {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} -> + Liveout = pliveout(L, PLive0), + Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)), + {nocall, GenKill, Livein, Succs} + end, + Changed = case Liveset =:= Liveset0 of + true -> Changed0; + false -> Changed0+1 + end, + plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed). + +-spec pliveout(label(), plive()) -> liveset(). +pliveout(L, PLive) -> + liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]). + +-spec psuccs(label(), plive()) -> [label()]. +psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)). +psuccs_val({call, _Livein, Succs}) -> Succs; +psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs. + +-spec plivein(label(), plive()) -> liveset(). +plivein(L, PLive) -> plivein_val(maps:get(L, PLive)). +plivein_val({call, Livein, _Succs}) -> Livein; +plivein_val({nocall, _GenKill, Livein, _Succs}) -> Livein. + +liveset_empty() -> ordsets:new(). +liveset_subtract(A, B) -> ordsets:subtract(A, B). +liveset_union(A, B) -> ordsets:union(A, B). +liveset_union(LivesetList) -> ordsets:union(LivesetList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass: Compute dataflow analyses required for placing mode3 +%% spills/restores. +%% Reuse analysis implementation in hipe_restore_reuse. +%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one +%% too. +-type avail() :: hipe_restore_reuse:avail(). + +-spec avail_analyse(target_cfg(), liveness(), target()) -> avail(). +avail_analyse(CFG, Liveness, Target) -> + hipe_restore_reuse:analyse(CFG, Liveness, Target). + +-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()). +mode3_split_in_block(L, Avail) -> + hipe_restore_reuse:split_in_block(L, Avail). + +-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()). +mode3_block_renameset(L, Avail) -> + hipe_restore_reuse:renamed_in_block(L, Avail). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Seventh pass +%% +%% Compute program space partitioning, collect information required by the +%% heuristic. +-type part_key() :: label(). +-type part_dsets() :: hipe_dsets:dsets(part_key()). +-type part_dsets_map() :: #{part_key() => part_key()}. +-type ducounts() :: #{part_key() => ducount()}. + +-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(), + target()) -> {cfg(), ducounts(), costs(), part_dsets()}. +scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) -> + #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0, + CFG = CFG0#cfg{bbs=#{}}, % kill reference + DSets0 = hipe_dsets:new(Labels), + Costs0 = costs_new(), + {BBs, DUCounts0, Costs1, DSets1} = + scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail, + Target, #{}, Costs0, DSets0, []), + {RLList, DSets2} = hipe_dsets:to_rllist(DSets1), + {Costs, DSets} = costs_map_roots(DSets2, Costs1), + DUCounts = collect_ducounts(RLList, DUCounts0, #{}), + {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}. + +-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts()) + -> ducounts(). +collect_ducounts([], _, Acc) -> Acc; +collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) -> + DUCount = lists:foldl( + fun(Key, FAcc) -> + ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc) + end, ducount_new(), Ls), + collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}). + +-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(), + rdefs(), avail(), target(), ducounts(), costs(), part_dsets(), + [{label(), bb()}]) + -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}. +scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target, + DUCounts, Costs, DSets, Acc) -> + {Acc, DUCounts, Costs, DSets}; +scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target, + DUCounts0, Costs0, DSets0, Acc) -> + Wt = weight(L, Weights), + {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} = + case bb_has_call(BB) of + false -> + DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end, + DSets0, bb_succ(BB)), + {DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs), + liveout(Liveness, L, Target)}; + true -> + LastI = #instr{def=LastDef} = bb_last(BB), + LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef), + %% We can omit the spill of a temp that has not been defined since the + %% last time it was spilled + SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)), + Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0), + Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) -> + SLivein = livein(Liveness, S, Target), + SPLivein = plivein(S, PLive), + SWt = weight_scaled(L, BranchWt, Weights), + Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2), + costs_insert(entry2, S, SWt, SPLivein, Costs3) + end, Costs1, branch_preds(LastI#instr.i, Target)), + {DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore} + end, + Mode3Splits = mode3_split_in_block(L, Avail), + {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []), + {Code, DUCount, Mode2Spills} = + scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode), + DUCounts = DUCounts0#{L => DUCount}, + M2SpillSet = ordsets:from_list(Mode2Spills), + Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5), + Mode3Renames = mode3_block_renameset(L, Avail), + Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6), + Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7), + Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8), + scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts, + Costs, DSets, [{L,BB#bb{code=Code}}|Acc]). + +-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs()) + -> costs(). +add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) -> + Unsplit = orddict_without_ordset(Mode3Renames, + orddict:from_list(ducount_to_list(DUCount))), + add_unsplit_mode3_costs_1(Unsplit, L, Costs). + +-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs()) + -> costs(). +add_unsplit_mode3_costs_1([], _L, Costs) -> Costs; +add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) -> + add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)). + +%% @doc Returns a new orddict without keys in Set and their associated values. +-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V)) + -> orddict:orddict(K, V). +orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K -> + orddict_without_ordset(Ss, Dict); +orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K -> + [D|orddict_without_ordset(Set, Ds)]; +orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K + orddict_without_ordset(Ss, Ds); +orddict_without_ordset(_, []) -> []; +orddict_without_ordset([], Dict) -> Dict. + +%% Scans the code forward, collecting and inserting mode3 restores +-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()), + [code_elem()]) + -> {[code_elem()], ordsets:ordset(temp())}. +scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored}; +scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) -> + #instr{def=Def, use=Use} = I, + {ToRestore, SplitHere1} = + lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0), + SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1), + Acc = + case ToRestore of + [] -> [I | Acc0]; + _ -> [I, #mode3_restores{temps=ToRestore} | Acc0] + end, + scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc). + +%% Scans the code backwards, collecting def/use counts and mode2 spills +-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(), + [temp()], [code_elem()]) + -> {[code_elem()], ducount(), [temp()]}. +scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) -> + {Acc, DUCount, Spills}; +scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) -> + scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]); +scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) -> + #instr{def=Def,use=Use} = I, + DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)), + Livein = liveness_step(I, Liveout), + RDefin = rdef_step(I, RDefout), + %% The temps that would be spilled after I in mode 2 + NewSpills = ordset_subtract_rdefsetf( + ordsets:intersection(Def, Liveout), + RDefout), + ?ASSERT(NewSpills =:= (NewSpills -- Spills0)), + Spills = NewSpills ++ Spills0, + Acc1 = case NewSpills of + [] -> Acc0; + _ -> [#mode2_spills{temps=NewSpills}|Acc0] + end, + scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]). + +-spec liveness_step(instr(), liveset()) -> liveset(). +liveness_step(#instr{def=Def, use=Use}, Liveout) -> + ordsets:union(Use, ordsets:subtract(Liveout, Def)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass: compute basic-block weighting + +-type weights() :: no_bb_weights + | {hipe_bb_weights:bb_weights(), float()}. + +-spec weight(label(), weights()) -> float(). +weight(L, Weights) -> weight_scaled(L, 1.0, Weights). + +-spec compute_weights(target_cfg(), target_module(), target_context(), + comp_options()) -> weights(). +compute_weights(CFG, TargetMod, TargetContext, Options) -> + case proplists:get_bool(range_split_weights, Options) of + false -> no_bb_weights; + true -> + {hipe_bb_weights:compute(CFG, TargetMod, TargetContext), + ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power, + Options, ?DEFAULT_WEIGHT_POWER))} + end. + +-spec weight_scaled(label(), float(), weights()) -> float(). +weight_scaled(_L, _Scale, no_bb_weights) -> 1.0; +weight_scaled(L, Scale, {Weights, Const}) -> + Wt0 = hipe_bb_weights:weight(L, Weights) * Scale, + Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0), + ?WEIGHT_FUN(Wt, Const). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Heuristic splitting decision. +%% +%% Decide which temps to split, in which parts, and pick new names for them. +-type spill_mode() :: mode1 % Spill temps at partition exits + | mode2 % Spill temps at definitions + | mode3.% Spill temps at definitions, restore temps at uses +-type ren() :: #{temp() => {spill_mode(), temp()}}. +-type renames() :: #{label() => ren()}. + +-record(heur_par, { + mode1_fudge :: float(), + min_gain :: float() + }). +-type heur_par() :: #heur_par{}. + +-spec decide(ducounts(), costs(), target(), comp_options()) -> renames(). +decide(DUCounts, Costs, Target, Options) -> + Par = #heur_par{ + mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options, + ?DEFAULT_MODE1_FUDGE), + min_gain = proplists:get_value(range_split_min_gain, Options, + ?DEFAULT_MIN_GAIN)}, + decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}). + +-spec decide_parts([{part_key(), ducount()}], costs(), target(), + heur_par(), renames()) + -> renames(). +decide_parts([], _Costs, _Target, _Par, Acc) -> Acc; +decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) -> + Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par, + #{}), + decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}). + +-spec decide_temps([{temp(), float()}], part_key(), costs(), target(), + heur_par(), ren()) + -> ren(). +decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc; +decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) -> + SpillCost1 = costs_query(Temp, entry1, Part, Costs) + + costs_query(Temp, exit, Part, Costs), + SpillCost2 = costs_query(Temp, entry2, Part, Costs) + + costs_query(Temp, spill, Part, Costs), + SpillCost3 = costs_query(Temp, restore, Part, Costs), + Acc = + %% SpillCost1 =:= 0.0 usually means the temp is local to the partition; + %% hence no need to split it + case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S) + andalso (not is_precoloured(Temp, Target)) + andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain)) + of + false -> Acc0; + true -> + Mode = + if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2, + Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 -> + mode1; + SpillCost2 < SpillCost3 -> + mode2; + true -> + mode3 + end, + Acc0#{Temp => {Mode, new_reg_nr(Target)}} + end, + decide_temps(Ts, Part, Costs, Target, Par, Acc). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Eighth pass: Rewrite program performing range splitting. + +-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps()) + -> target_cfg(). +rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps) -> + rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps, TCFG). + +-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps(), target_cfg()) + -> target_cfg(). +rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames, + _Temps, TCFG) -> + TCFG; +rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps, TCFG0) -> + Code0Rev = lists:reverse(bb_code(BB)), + EntryRen = maps:get(maps:get(L,DSets), Renames), + M3Ren = mode3_block_renameset(L, Avail), + SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren), + Fun = fun(I) -> subst_temps(SubstFun, I, Target) end, + {Code, TCFG} = + case bb_has_call(BB) of + false -> + Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target, + []), + {Code1, TCFG0}; + true -> + CallI0 = hd(Code0Rev), + Succ = bb_succ(BB), + {CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets, + Renames, Temps, CallI0#instr.i, TCFG0), + Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)), + Defout = defbutlast(L, Defs), + SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target), + Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps, + Target, []), + Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]), + {Code2, TCFG1} + end, + TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code), + rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps, + update_bb(TCFG, L, TBB, Target)). + +-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(), + ordsets:ordset(temp()), temps(), target(), + [target_instr()]) + -> [target_instr()]. +rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc; +rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) -> + Acc = + case I of + #instr{i=TI} -> [Fun(TI)|Acc0]; + #mode2_spills{temps=Mode2Spills} -> + add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0); + #mode3_restores{temps=Mode3Restores} -> + add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0) + end, + rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc). + +-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(), + ordsets:ordset(temp()), temps(), [target_instr()]) + -> [target_instr()]. +add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc; +add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) -> + Acc = + case Ren of + #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 -> + case Mode =/= mode3 orelse lists:member(R, M3Ren) of + false -> Acc0; + true -> + #{R := T} = Temps, + SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target), + [SpillInstr|Acc0] + end; + #{} -> + Acc0 + end, + add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc). + +-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(), + [target_instr()]) + -> [target_instr()]. +add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc; +add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) -> + case Ren of + #{R := {mode3, NewName}} -> + #{R := T} = Temps, + RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target), + add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]); + #{} -> + add_mode3_restores(Rs, Target, Ren, Temps, Acc) + end. + +-type rewrite_fun() :: fun((target_instr()) -> target_instr()). +-type subst_fun() :: fun((target_temp()) -> target_temp()). +-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun(). +rewrite_subst_fun(Target, Ren, M3Ren) -> + fun(Temp) -> + Reg = reg_nr(Temp, Target), + case Ren of + #{Reg := {Mode, NewName}} -> + case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of + false -> Temp; + true -> update_reg_nr(NewName, Temp, Target) + end; + #{} -> Temp + end + end. + +-type spillmap() :: [{temp(), target_instr()}]. +-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target()) + -> spillmap(). +mk_spillmap(Ren, Livein, Defout, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)} + end || {Reg, {mode1, NewName}} <- maps:to_list(Ren), + lists:member(Reg, Livein), defsetf_member(Reg, Defout)]. + +-spec mk_restores(ren(), liveset(), liveset(), temps(), target()) + -> [target_instr()]. +mk_restores(Ren, Livein, PLivein, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target) + end || {Reg, {Mode, NewName}} <- maps:to_list(Ren), + ( (Mode =:= mode1 andalso lists:member(Reg, Livein )) + orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))]. + +-spec inject_restores([label()], target(), liveness(), plive(), + part_dsets_map(), renames(), temps(), target_instr(), + target_cfg()) + -> {target_instr(), target_cfg()}. +inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI, + TCFG) -> + {CFTI, TCFG}; +inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0, + TCFG0) -> + Ren = maps:get(maps:get(L,DSets), Renames), + Livein = livein(Liveness, L, Target), + PLivein = plivein(L, PLive), + {CFTI, TCFG} = + case mk_restores(Ren, Livein, PLivein, Temps, Target) of + [] -> {CFTI0, TCFG0}; % optimisation + Restores -> + RestBBLbl = new_label(Target), + Code = Restores ++ [mk_goto(L, Target)], + CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target), + TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target), + {CFTI1, TCFG1} + end, + inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI, + TCFG). + +%% Heuristic. Move spills up until we meet the edge of the BB or a definition of +%% that temp. +-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()]) + -> [target_instr()]. +lift_spills([], _Target, SpillMap, Acc) -> + [SpillI || {_, SpillI} <- SpillMap] ++ Acc; +lift_spills([I|Is], Target, SpillMap0, Acc) -> + Def = reg_defines(I, Target), + {Spills0, SpillMap} = + lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0), + Spills = [SpillI || {_, SpillI} <- Spills0], + lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]). + +reg_defines(I, Target) -> + reg_names(defines(I,Target), Target). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Costs ADT +%% +%% Keeps track of cumulative cost of spilling temps in particular partitions +%% using particular spill modes. +-type cost_map() :: #{[part_key()|temp()] => float()}. +-type cost_key() :: entry1 | entry2 | exit | spill | restore. +-record(costs, {entry1 = #{} :: cost_map() + ,entry2 = #{} :: cost_map() + ,exit = #{} :: cost_map() + ,spill = #{} :: cost_map() + ,restore = #{} :: cost_map() + }). +-type costs() :: #costs{}. + +-spec costs_new() -> costs(). +costs_new() -> #costs{}. + +-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs()) + -> costs(). +costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) -> + Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)}; +costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) -> + Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)}; +costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) -> + Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)}; +costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) -> + Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)}; +costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) -> + Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}. + +costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) -> + lists:foldl(fun(Live, CostMap1) -> + map_update_counter([A|Live], Weight, CostMap1) + end, CostMap0, Liveset). + +-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}. +costs_map_roots(DSets0, Costs) -> + {Entry1, DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1), + {Entry2, DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2), + {Exit, DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit), + {Spill, DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill), + {Restore, DSets} = costs_map_roots_1(DSets4, Costs#costs.restore), + {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore}, + DSets}. + +costs_map_roots_1(DSets0, CostMap) -> + {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) -> + {AR, DSets2} = hipe_dsets:find(A, DSets1), + {{[AR|T], Wt}, DSets2} + end, DSets0, maps:to_list(CostMap)), + {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}. + +maps_from_list_merge([], _MF, Acc) -> Acc; +maps_from_list_merge([{K,V}|Ps], MF, Acc) -> + maps_from_list_merge(Ps, MF, case Acc of + #{K := OV} -> Acc#{K := MF(V, OV)}; + #{} -> Acc#{K => V} + end). + +-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float(). +costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) -> + costs_query_1(Temp, Part, Entry1); +costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) -> + costs_query_1(Temp, Part, Entry2); +costs_query(Temp, exit, Part, #costs{exit=Exit}) -> + costs_query_1(Temp, Part, Exit); +costs_query(Temp, spill, Part, #costs{spill=Spill}) -> + costs_query_1(Temp, Part, Spill); +costs_query(Temp, restore, Part, #costs{restore=Restore}) -> + costs_query_1(Temp, Part, Restore). + +costs_query_1(Temp, Part, CostMap) -> + Key = [Part|Temp], + case CostMap of + #{Key := Wt} -> Wt; + #{} -> 0.0 + end. + +-spec map_update_counter(Key, number(), #{Key => number(), OK => OV}) + -> #{Key := number(), OK => OV}. +map_update_counter(Key, Incr, Map) -> + case Map of + #{Key := Orig} -> Map#{Key := Orig + Incr}; + #{} -> Map#{Key => Incr} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Def and use counting ADT +-type ducount() :: #{temp() => float()}. + +-spec ducount_new() -> ducount(). +ducount_new() -> #{}. + +-spec ducount_add([temp()], float(), ducount()) -> ducount(). +ducount_add([], _Weight, DUCount) -> DUCount; +ducount_add([T|Ts], Weight, DUCount0) -> + DUCount = + case DUCount0 of + #{T := Count} -> DUCount0#{T := Count + Weight}; + #{} -> DUCount0#{T => Weight} + end, + ducount_add(Ts, Weight, DUCount). + +ducount_to_list(DUCount) -> maps:to_list(DUCount). + +-spec ducount_merge(ducount(), ducount()) -> ducount(). +ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCA), DCB); +ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCB), DCA). + +ducount_merge_1([], DUCount) -> DUCount; +ducount_merge_1([{T,AC}|Ts], DUCount0) -> + DUCount = + case DUCount0 of + #{T := BC} -> DUCount0#{T := AC + BC}; + #{} -> DUCount0#{T => AC} + end, + ducount_merge_1(Ts, DUCount). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(def_use). +?TGT_IFACE_1(defines). +?TGT_IFACE_1(defines_all_alloc). +?TGT_IFACE_1(is_precoloured). +?TGT_IFACE_1(mk_goto). +?TGT_IFACE_2(mk_move). +?TGT_IFACE_0(new_label). +?TGT_IFACE_0(new_reg_nr). +?TGT_IFACE_1(number_of_temporaries). +?TGT_IFACE_3(redirect_jmp). +?TGT_IFACE_1(reg_nr). +?TGT_IFACE_1(reverse_postorder). +?TGT_IFACE_2(subst_temps). +?TGT_IFACE_3(update_bb). +?TGT_IFACE_2(update_reg_nr). + +branch_preds(Instr, {TgtMod,TgtCtx}) -> + merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)). + +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)). + +merge_sorted_preds([]) -> []; +merge_sorted_preds([{L, P1}, {L, P2}|LPs]) -> + merge_sorted_preds([{L, P1+P2}|LPs]); +merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)]. + +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index 5bbb0ba7c1..29ef3adcc2 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -32,9 +32,11 @@ ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) -> ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, TargetCtx) -> ?inc_counter(ra_calls_counter, 1), - SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx), + {CFG1, Liveness1} = + do_range_split(CFG0, Liveness0, TargetMod, TargetCtx, Options), + SpillLimit0 = TargetMod:number_of_temporaries(CFG1, TargetCtx), {Coloring, _, CFG, Liveness} = - call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options, + call_allocator_initial(CFG1, Liveness1, SpillLimit0, SpillIndex, Options, RegAllocMod, TargetMod, TargetCtx), %% The first iteration, the hipe_regalloc_prepass may create new temps, these %% should not end up above SpillLimit. @@ -96,3 +98,20 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod, RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, TargetCtx, Options) end. + +do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) -> + {CFG2, Liveness1} = + case proplists:get_bool(ra_restore_reuse, Options) of + true -> + CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx), + {CFG1, TgtMod:analyze(CFG1, TgtCtx)}; + false -> + {CFG0, Liveness0} + end, + case proplists:get_bool(ra_range_split, Options) of + true -> + CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options), + {CFG3, TgtMod:analyze(CFG3, TgtCtx)}; + false -> + {CFG2, Liveness1} + end. diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl index e212420ad2..5024840237 100644 --- a/lib/hipe/regalloc/hipe_regalloc_prepass.erl +++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl @@ -483,8 +483,8 @@ merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets}; merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]); merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) -> - {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), - {ExitRoot, DSets} = dsets_find({exit,L}, DSets1), + {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0), + {ExitRoot, DSets} = hipe_dsets:find({exit,L}, DSets1), case EntryRoot =:= ExitRoot of false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]); true -> @@ -501,7 +501,7 @@ merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) -> -spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}. merge_small_parts(DSets0) -> - {RLList, DSets1} = dsets_to_rllist(DSets0), + {RLList, DSets1} = hipe_dsets:to_rllist(DSets0), RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList], merge_small_parts_1(RLLList, DSets1, []). @@ -518,8 +518,8 @@ merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc) merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]); merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) -> ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS), - DSets1 = dsets_union(R1, R2, DSets0), - {R, DSets} = dsets_find(R1, DSets1), + DSets1 = hipe_dsets:union(R1, R2, DSets0), + {R, DSets} = hipe_dsets:find(R1, DSets1), merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc). %% @doc Partition an ordering over BBs into subsequences for the dsets that @@ -531,8 +531,8 @@ part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}). part_order([], DSets, Acc) -> {Acc, DSets}; part_order([L|Ls], DSets0, Acc0) -> - {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), - {ExitRoot, DSets2} = dsets_find({exit,L}, DSets1), + {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0), + {ExitRoot, DSets2} = hipe_dsets:find({exit,L}, DSets1), Acc1 = map_append(EntryRoot, L, Acc0), %% Only include the label once if both entry and exit is in same partition Acc2 = case EntryRoot =:= ExitRoot of @@ -558,73 +558,26 @@ map_append(Key, Elem, Map) -> %% split point, and one from the end to the last split point. -type bb_dset_key() :: {entry | exit, label()}. --type bb_dsets() :: dsets(bb_dset_key()). +-type bb_dsets() :: hipe_dsets:dsets(bb_dset_key()). -type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}]. -spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets(). initial_dsets(CFG, TgtMod, TgtCtx) -> Labels = TgtMod:labels(CFG, TgtCtx), - DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])), + DSets0 = hipe_dsets:new(lists:append([[{entry,L},{exit,L}] || L <- Labels])), Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)] || L <- Labels]), - lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end, + lists:foldl(fun({X, Y}, DS) -> hipe_dsets:union({exit,X}, {entry,Y}, DS) end, DSets0, Edges). -spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets(). join_whole_blocks(PartBBList, DSets0) -> - lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS); + lists:foldl(fun({L, {single, _}}, DS) -> + hipe_dsets:union({entry,L}, {exit,L}, DS); ({_, {split, _, _}}, DS) -> DS end, DSets0, PartBBList). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% The disjoint set forests data structure, for elements of arbitrary types. -%% Note that the find operation mutates the set. -%% -%% We could do this more efficiently if we restricted the elements to integers, -%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used, -%% for a persistent interface (which isn't that nice when even accessors return -%% modified copies), the array module could be used. --type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}. - --spec dsets_new([E]) -> dsets(E). -dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]). - --spec dsets_find(E, dsets(E)) -> {E, dsets(E)}. -dsets_find(E, DS0) -> - case DS0 of - #{E := {root,_}} -> {E, DS0}; - #{E := {node,N}} -> - case dsets_find(N, DS0) of - {N, _}=T -> T; - {R, DS1} -> {R, DS1#{E := {node,R}}} - end - ;_ -> error(badarg, [E, DS0]) - end. - --spec dsets_union(E, E, dsets(E)) -> dsets(E). -dsets_union(X, Y, DS0) -> - {XRoot, DS1} = dsets_find(X, DS0), - case dsets_find(Y, DS1) of - {XRoot, DS2} -> DS2; - {YRoot, DS2} -> - #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2, - if XRR < YRR -> DS2#{XRoot := {node,YRoot}}; - XRR > YRR -> DS2#{YRoot := {node,XRoot}}; - true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}} - end - end. - --spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}. -dsets_to_rllist(DS0) -> - {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0), - {maps:to_list(Lists), DS}. - -dsets_to_rllist([], Acc, DS) -> {Acc, DS}; -dsets_to_rllist([E|Es], Acc, DS0) -> - {ERoot, DS} = dsets_find(E, DS0), - dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS). - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Third pass %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Collect all referenced temps in each partition. diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl new file mode 100644 index 0000000000..2158bd185e --- /dev/null +++ b/lib/hipe/regalloc/hipe_restore_reuse.erl @@ -0,0 +1,516 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% RESTORE REUSE LIVE RANGE SPLITTING PASS +%% +%% This is a simple live range splitter that tries to avoid sequences where a +%% temporary is accessed on stack multiple times by keeping a copy of that temp +%% around in a register. +%% +%% At any point where a temporary that is expected to be spilled (see uses of +%% spills_add_list/2) is defined or used, this pass considers that temporary +%% "available". +%% +%% Limitations: +%% * If a live range part starts with several different restores, this module +%% will introduce a new temp number for each of them, and later be forced to +%% generate phi blocks. It would be more efficient to introduce just a +%% single temp number. That would also remove the need for the phi blocks. +%% * If a live range part ends in a definition, that definition should just +%% define the base temp rather than the substitution, since some CISC +%% targets might be able to inline the memory access in the instruction. +-module(hipe_restore_reuse). + +-export([split/4]). + +%% Exports for hipe_range_split, which uses restore_reuse as one possible spill +%% "mode" +-export([analyse/3 + ,renamed_in_block/2 + ,split_in_block/2 + ]). +-export_type([avail/0]). + +-compile(inline). + +%% -define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +-type target_cfg() :: any(). +-type liveness() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. +-type label() :: non_neg_integer(). +-type reg() :: non_neg_integer(). +-type instr() :: any(). +-type temp() :: any(). + +-spec split(target_cfg(), liveness(), target_module(), target_context()) + -> target_cfg(). +split(CFG, Liveness, TargetMod, TargetContext) -> + Target = {TargetMod, TargetContext}, + Avail = analyse(CFG, Liveness, Target), + rewrite(CFG, Target, Avail). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-opaque avail() :: #{label() => avail_bb()}. + +-record(avail_bb, { + %% Blocks where HasCall is true are considered to have too high + %% register pressure to support a register copy of a temp + has_call :: boolean(), + %% AvailOut: Temps that can be split (are available) + out :: availset(), + %% Gen: AvailOut generated locally + gen :: availset(), + %% WantIn: Temps that are split + want :: regset(), + %% Self: Temps with avail-want pairs locally + self :: regset(), + %% DefIn: Temps shadowed by later def in same live range part + defin :: regset(), + pred :: [label()], + succ :: [label()] + }). +-type avail_bb() :: #avail_bb{}. + +avail_get(L, Avail) -> maps:get(L, Avail). +avail_set(L, Val, Avail) -> maps:put(L, Val, Avail). +avail_has_call(L, Avail) -> (avail_get(L, Avail))#avail_bb.has_call. +avail_out(L, Avail) -> (avail_get(L, Avail))#avail_bb.out. +avail_self(L, Avail) -> (avail_get(L, Avail))#avail_bb.self. +avail_pred(L, Avail) -> (avail_get(L, Avail))#avail_bb.pred. +avail_succ(L, Avail) -> (avail_get(L, Avail))#avail_bb.succ. + +avail_in(L, Avail) -> + case avail_pred(L, Avail) of + [] -> availset_empty(); % entry + Pred -> + lists:foldl(fun(P, ASet) -> + availset_intersect(avail_out(P, Avail), ASet) + end, availset_top(), Pred) + end. + +want_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.want. +want_out(L, Avail) -> + lists:foldl(fun(S, Set) -> + ordsets:union(want_in(S, Avail), Set) + end, ordsets:new(), avail_succ(L, Avail)). + +def_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.defin. +def_out(L, Avail) -> + case avail_succ(L, Avail) of + [] -> ordsets:new(); % entry + Succ -> + ordsets:intersection([def_in(S, Avail) || S <- Succ]) + end. + +-type regset() :: ordsets:ordset(reg()). +-type availset() :: top | regset(). +availset_empty() -> []. +availset_top() -> top. +availset_intersect(top, B) -> B; +availset_intersect(A, top) -> A; +availset_intersect(A, B) -> ordsets:intersection(A, B). +availset_union(top, _) -> top; +availset_union(_, top) -> top; +availset_union(A, B) -> ordsets:union(A, B). +ordset_intersect_availset(OS, top) -> OS; +ordset_intersect_availset(OS, AS) -> ordsets:intersection(OS, AS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Analysis pass +%% +%% The analysis pass collects the set of temps we're interested in splitting +%% (Spills), and computes three dataflow analyses for this subset of temps. +%% +%% Avail, which is the set of temps which are available in register from a +%% previous (potential) spill or restore without going through a HasCall +%% block. +%% Want, which is a liveness analysis for the subset of temps used by an +%% instruction that are also in Avail at that point. In other words, Want is +%% the set of temps that are split (has a register copy) at a particular +%% point. +%% Def, which are the temps that are already going to be spilled later, and so +%% need not be spilled when they're defined. +%% +%% Lastly, it computes the set Self for each block, which is the temps that have +%% avail-want pairs in the same block, and so should be split in that block even +%% if they're not in WantIn for the block. + +-spec analyse(target_cfg(), liveness(), target()) -> avail(). +analyse(CFG, Liveness, Target) -> + Avail0 = analyse_init(CFG, Liveness, Target), + RPO = reverse_postorder(CFG, Target), + AvailLs = [L || L <- RPO, not avail_has_call(L, Avail0)], + Avail1 = avail_dataf(AvailLs, Avail0), + Avail2 = analyse_filter_want(maps:keys(Avail1), Avail1), + PO = lists:reverse(RPO), + want_dataf(PO, Avail2). + +-spec analyse_init(target_cfg(), liveness(), target()) -> avail(). +analyse_init(CFG, Liveness, Target) -> + analyse_init(labels(CFG, Target), CFG, Liveness, Target, #{}, []). + +-spec analyse_init([label()], target_cfg(), liveness(), target(), spillset(), + [{label(), avail_bb()}]) + -> avail(). +analyse_init([], _CFG, _Liveness, Target, Spills0, Acc) -> + %% Precoloured temps can't be spilled + Spills = spills_filter(fun(R) -> not is_precoloured(R, Target) end, Spills0), + analyse_init_1(Acc, Spills, []); +analyse_init([L|Ls], CFG, Liveness, Target, Spills0, Acc) -> + {DefIn, Gen, Self, Want, HasCall0} = + analyse_scan(hipe_bb:code(bb(CFG, L, Target)), Target, + ordsets:new(), ordsets:new(), ordsets:new(), + ordsets:new()), + {Spills, Out, HasCall} = + case HasCall0 of + false -> {Spills0, availset_top(), false}; + {true, CallDefs} -> + Spill = ordsets:subtract(liveout(Liveness, L, Target), CallDefs), + {spills_add_list(Spill, Spills0), Gen, true} + end, + Pred = hipe_gen_cfg:pred(CFG, L), + Succ = hipe_gen_cfg:succ(CFG, L), + Val = #avail_bb{gen=Gen, want=Want, self=Self, out=Out, has_call=HasCall, + pred=Pred, succ=Succ, defin=DefIn}, + analyse_init(Ls, CFG, Liveness, Target, Spills, [{L, Val} | Acc]). + +-spec analyse_init_1([{label(), avail_bb()}], spillset(), + [{label(), avail_bb()}]) + -> avail(). +analyse_init_1([], _Spills, Acc) -> maps:from_list(Acc); +analyse_init_1([{L, Val0}|Vs], Spills, Acc) -> + #avail_bb{out=Out,gen=Gen,want=Want,self=Self} = Val0, + Val = Val0#avail_bb{ + out = spills_filter_availset(Out, Spills), + gen = spills_filter_availset(Gen, Spills), + want = spills_filter_availset(Want, Spills), + self = spills_filter_availset(Self, Spills)}, + analyse_init_1(Vs, Spills, [{L, Val} | Acc]). + +-type spillset() :: #{reg() => []}. +-spec spills_add_list([reg()], spillset()) -> spillset(). +spills_add_list([], Spills) -> Spills; +spills_add_list([R|Rs], Spills) -> spills_add_list(Rs, Spills#{R => []}). + +-spec spills_filter_availset(availset(), spillset()) -> availset(). +spills_filter_availset([E|Es], Spills) -> + case Spills of + #{E := _} -> [E|spills_filter_availset(Es, Spills)]; + #{} -> spills_filter_availset(Es, Spills) + end; +spills_filter_availset([], _) -> []; +spills_filter_availset(top, _) -> top. + +spills_filter(Fun, Spills) -> maps:filter(fun(K, _) -> Fun(K) end, Spills). + +-spec analyse_scan([instr()], target(), Defset, Gen, Self, Want) + -> {Defset, Gen, Self, Want, HasCall} when + HasCall :: false | {true, regset()}, + Defset :: regset(), + Gen :: availset(), + Self :: regset(), + Want :: regset(). +analyse_scan([], _Target, Defs, Gen, Self, Want) -> + {Defs, Gen, Self, Want, false}; +analyse_scan([I|Is], Target, Defs0, Gen0, Self0, Want0) -> + {DefL, UseL} = reg_def_use(I, Target), + Use = ordsets:from_list(UseL), + Def = ordsets:from_list(DefL), + Self = ordsets:union(ordsets:intersection(Use, Gen0), Self0), + Want = ordsets:union(ordsets:subtract(Use, Defs0), Want0), + Defs = ordsets:union(Def, Defs0), + case defines_all_alloc(I, Target) of + true -> + [] = Is, %assertion + {Defs, ordsets:new(), Self, Want, {true, Def}}; + false -> + Gen = ordsets:union(ordsets:union(Def, Use), Gen0), + analyse_scan(Is, Target, Defs, Gen, Self, Want) + end. + +-spec avail_dataf([label()], avail()) -> avail(). +avail_dataf(RPO, Avail0) -> + case avail_dataf_once(RPO, Avail0, 0) of + {Avail, 0} -> Avail; + {Avail, _Changed} -> + avail_dataf(RPO, Avail) + end. + +-spec avail_dataf_once([label()], avail(), non_neg_integer()) + -> {avail(), non_neg_integer()}. +avail_dataf_once([], Avail, Changed) -> {Avail, Changed}; +avail_dataf_once([L|Ls], Avail0, Changed0) -> + ABB = #avail_bb{out=OldOut, gen=Gen} = avail_get(L, Avail0), + In = avail_in(L, Avail0), + {Changed, Avail} = + case availset_union(In, Gen) of + OldOut -> {Changed0, Avail0}; + Out -> {Changed0+1, avail_set(L, ABB#avail_bb{out=Out}, Avail0)} + end, + avail_dataf_once(Ls, Avail, Changed). + +-spec analyse_filter_want([label()], avail()) -> avail(). +analyse_filter_want([], Avail) -> Avail; +analyse_filter_want([L|Ls], Avail0) -> + ABB = #avail_bb{want=Want0, defin=DefIn0} = avail_get(L, Avail0), + In = avail_in(L, Avail0), + Want = ordset_intersect_availset(Want0, In), + DefIn = ordset_intersect_availset(DefIn0, In), + Avail = avail_set(L, ABB#avail_bb{want=Want, defin=DefIn}, Avail0), + analyse_filter_want(Ls, Avail). + +-spec want_dataf([label()], avail()) -> avail(). +want_dataf(PO, Avail0) -> + case want_dataf_once(PO, Avail0, 0) of + {Avail, 0} -> Avail; + {Avail, _Changed} -> + want_dataf(PO, Avail) + end. + +-spec want_dataf_once([label()], avail(), non_neg_integer()) + -> {avail(), non_neg_integer()}. +want_dataf_once([], Avail, Changed) -> {Avail, Changed}; +want_dataf_once([L|Ls], Avail0, Changed0) -> + ABB0 = #avail_bb{want=OldIn,defin=OldDef} = avail_get(L, Avail0), + AvailIn = avail_in(L, Avail0), + Out = want_out(L, Avail0), + DefOut = def_out(L, Avail0), + {Changed, Avail} = + case {ordsets:union(ordset_intersect_availset(Out, AvailIn), OldIn), + ordsets:union(ordset_intersect_availset(DefOut, AvailIn), OldDef)} + of + {OldIn, OldDef} -> {Changed0, Avail0}; + {In, DefIn} -> + ABB = ABB0#avail_bb{want=In,defin=DefIn}, + {Changed0+1, avail_set(L, ABB, Avail0)} + end, + want_dataf_once(Ls, Avail, Changed). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite pass +-type subst_dict() :: orddict:orddict(reg(), reg()). +-type input() :: #{label() => subst_dict()}. + +-spec rewrite(target_cfg(), target(), avail()) -> target_cfg(). +rewrite(CFG, Target, Avail) -> + RPO = reverse_postorder(CFG, Target), + rewrite(RPO, Target, Avail, #{}, CFG). + +-spec rewrite([label()], target(), avail(), input(), target_cfg()) + -> target_cfg(). +rewrite([], _Target, _Avail, _Input, CFG) -> CFG; +rewrite([L|Ls], Target, Avail, Input0, CFG0) -> + SplitHere = split_in_block(L, Avail), + {Input1, LInput} = + case Input0 of + #{L := LInput0} -> {Input0, LInput0}; + #{} -> {Input0#{L => []}, []} % entry block + end, + ?ASSERT([] =:= [X || X <- SplitHere, orddict:is_key(X, LInput)]), + ?ASSERT(want_in(L, Avail) =:= orddict:fetch_keys(LInput)), + {CFG1, LOutput} = + case {SplitHere, LInput} of + {[], []} -> % optimisation (rewrite will do nothing, so skip it) + {CFG0, LInput}; + _ -> + Code0 = hipe_bb:code(BB=bb(CFG0, L, Target)), + DefOut = def_out(L, Avail), + {Code, LOutput0, _DefIn} = + rewrite_instrs(Code0, Target, LInput, DefOut, SplitHere), + {update_bb(CFG0, L, hipe_bb:code_update(BB, Code), Target), LOutput0} + end, + {Input, CFG} = rewrite_succs(avail_succ(L, Avail), Target, L, LOutput, Avail, + Input1, CFG1), + rewrite(Ls, Target, Avail, Input, CFG). + +-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()). +renamed_in_block(L, Avail) -> + ordsets:union([avail_self(L, Avail), want_in(L, Avail), + want_out(L, Avail)]). + +-spec split_in_block(label(), avail()) -> ordsets:ordset(reg()). +split_in_block(L, Avail) -> + ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)), + want_in(L, Avail)). + +-spec rewrite_instrs([instr()], target(), subst_dict(), regset(), [reg()]) + -> {[instr()], subst_dict(), regset()}. +rewrite_instrs([], _Target, Output, DefOut, []) -> + {[], Output, DefOut}; +rewrite_instrs([I|Is], Target, Input0, BBDefOut, SplitHere0) -> + {TDef, TUse} = def_use(I, Target), + {Def, Use} = {reg_names(TDef, Target), reg_names(TUse, Target)}, + %% Restores are generated in forward order by picking temps from SplitHere as + %% they're used or defined. After the last instruction, all temps have been + %% picked. + {ISplits, SplitHere} = + lists:partition(fun(R) -> + lists:member(R, Def) orelse lists:member(R, Use) + end, SplitHere0), + {Input, Restores} = + case ISplits of + [] -> {Input0, []}; + _ -> + make_splits(ISplits, Target, TDef, TUse, Input0, []) + end, + %% Here's the recursive call + {Acc0, Output, DefOut} = + rewrite_instrs(Is, Target, Input, BBDefOut, SplitHere), + %% From here we're processing instructions in reverse order, because to avoid + %% redundant spills we need to walk the 'def' dataflow, which is in reverse. + SubstFun = fun(Temp) -> + case orddict:find(reg_nr(Temp, Target), Input) of + {ok, NewTemp} -> NewTemp; + error -> Temp + end + end, + Acc1 = insert_spills(TDef, Target, Input, DefOut, Acc0), + Acc = Restores ++ [subst_temps(SubstFun, I, Target) | Acc1], + DefIn = ordsets:union(DefOut, ordsets:from_list(Def)), + {Acc, Output, DefIn}. + +-spec make_splits([reg()], target(), [temp()], [temp()], subst_dict(), + [instr()]) + -> {subst_dict(), [instr()]}. +make_splits([], _Target, _TDef, _TUse, Input, Acc) -> + {Input, Acc}; +make_splits([S|Ss], Target, TDef, TUse, Input0, Acc0) -> + SubstReg = new_reg_nr(Target), + {Acc, Subst} = + case find_reg_temp(S, TUse, Target) of + error -> + {ok, Temp} = find_reg_temp(S, TDef, Target), + {Acc0, update_reg_nr(SubstReg, Temp, Target)}; + {ok, Temp} -> + Subst0 = update_reg_nr(SubstReg, Temp, Target), + Acc1 = [mk_move(Temp, Subst0, Target) | Acc0], + {Acc1, Subst0} + end, + Input = orddict:store(S, Subst, Input0), + make_splits(Ss, Target, TDef, TUse, Input, Acc). + +-spec find_reg_temp(reg(), [temp()], target()) -> error | {ok, temp()}. +find_reg_temp(_Reg, [], _Target) -> error; +find_reg_temp(Reg, [T|Ts], Target) -> + case reg_nr(T, Target) of + Reg -> {ok, T}; + _ -> find_reg_temp(Reg, Ts, Target) + end. + +-spec insert_spills([temp()], target(), subst_dict(), regset(), [instr()]) + -> [instr()]. +insert_spills([], _Target, _Input, _DefOut, Acc) -> Acc; +insert_spills([T|Ts], Target, Input, DefOut, Acc0) -> + R = reg_nr(T, Target), + Acc = + case orddict:find(R, Input) of + error -> Acc0; + {ok, Subst} -> + case lists:member(R, DefOut) of + true -> Acc0; + false -> [mk_move(Subst, T, Target) | Acc0] + end + end, + insert_spills(Ts, Target, Input, DefOut, Acc). + +-spec rewrite_succs([label()], target(), label(), subst_dict(), avail(), + input(), target_cfg()) -> {input(), target_cfg()}. +rewrite_succs([], _Target, _P, _POutput, _Avail, Input, CFG) -> {Input, CFG}; +rewrite_succs([L|Ls], Target, P, POutput, Avail, Input0, CFG0) -> + NewLInput = orddict_with_ordset(want_in(L, Avail), POutput), + {Input, CFG} = + case Input0 of + #{L := LInput} -> + CFG2 = + case required_phi_moves(LInput, NewLInput) of + [] -> CFG0; + ReqMovs -> + PhiLb = new_label(Target), + Code = [mk_move(S,D,Target) || {S,D} <- ReqMovs] + ++ [mk_goto(L, Target)], + PhiBB = hipe_bb:mk_bb(Code), + CFG1 = update_bb(CFG0, PhiLb, PhiBB, Target), + bb_redirect_jmp(L, PhiLb, P, CFG1, Target) + end, + {Input0, CFG2}; + #{} -> + {Input0#{L => NewLInput}, CFG0} + end, + rewrite_succs(Ls, Target, P, POutput, Avail, Input, CFG). + +-spec bb_redirect_jmp(label(), label(), label(), target_cfg(), target()) + -> target_cfg(). +bb_redirect_jmp(From, To, Lb, CFG, Target) -> + BB0 = bb(CFG, Lb, Target), + Last = redirect_jmp(hipe_bb:last(BB0), From, To, Target), + BB = hipe_bb:code_update(BB0, hipe_bb:butlast(BB0) ++ [Last]), + update_bb(CFG, Lb, BB, Target). + +-spec required_phi_moves(subst_dict(), subst_dict()) -> [{reg(), reg()}]. +required_phi_moves([], []) -> []; +required_phi_moves([P|Is], [P|Os]) -> required_phi_moves(Is, Os); +required_phi_moves([{K, In}|Is], [{K, Out}|Os]) -> + [{Out, In}|required_phi_moves(Is, Os)]. + +%% @doc Returns a new orddict with the keys in Set and their associated values. +-spec orddict_with_ordset(ordsets:ordset(K), orddict:orddict(K, V)) + -> orddict:orddict(K, V). +orddict_with_ordset([S|Ss], [{K, _}|_]=Dict) when S < K -> + orddict_with_ordset(Ss, Dict); +orddict_with_ordset([S|_]=Set, [{K, _}|Ds]) when S > K -> + orddict_with_ordset(Set, Ds); +orddict_with_ordset([_S|Ss], [{_K, _}=P|Ds]) -> % _S == _K + [P|orddict_with_ordset(Ss, Ds)]; +orddict_with_ordset([], _) -> []; +orddict_with_ordset(_, []) -> []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(def_use). +?TGT_IFACE_1(defines_all_alloc). +?TGT_IFACE_1(is_precoloured). +?TGT_IFACE_1(labels). +?TGT_IFACE_1(mk_goto). +?TGT_IFACE_2(mk_move). +?TGT_IFACE_0(new_label). +?TGT_IFACE_0(new_reg_nr). +?TGT_IFACE_3(redirect_jmp). +?TGT_IFACE_1(reg_nr). +?TGT_IFACE_1(reverse_postorder). +?TGT_IFACE_2(subst_temps). +?TGT_IFACE_3(update_bb). +?TGT_IFACE_2(update_reg_nr). + +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)). + +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +reg_def_use(I, Target) -> + {TDef, TUse} = def_use(I, Target), + {reg_names(TDef, Target), reg_names(TUse, Target)}. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 31fca81316..78b6379eba 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -24,6 +24,7 @@ ,reg_nr/2 ,def_use/2 ,is_move/2 + ,is_spill_move/2 ,is_precoloured/2 ,var_range/2 ,allocatable/1 @@ -46,12 +47,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, no_context) -> hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). @@ -115,6 +123,9 @@ bb(CFG,L,_) -> update_bb(CFG,L,BB,_) -> hipe_sparc_cfg:bb_add(CFG,L,BB). +branch_preds(Branch,_) -> + hipe_sparc_cfg:branch_preds(Branch). + %% SPARC stuff def_use(Instruction, Ctx) -> @@ -144,9 +155,24 @@ is_move(Instruction, _) -> false -> false end. +is_spill_move(Instruction, _) -> + hipe_sparc:is_pseudo_spill_move(Instruction). + reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_sparc:mk_pseudo_move(Src, Dst). + +mk_goto(Label, _) -> + hipe_sparc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(sparc). + new_reg_nr(_) -> hipe_gensym:get_next_var(sparc). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 050d65e1a9..485fdc212a 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -24,6 +24,7 @@ ,reg_nr/2 ,def_use/2 ,is_move/2 + ,is_spill_move/2 ,is_precoloured/2 ,var_range/2 ,allocatable/1 @@ -46,12 +47,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, no_context) -> hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). @@ -108,6 +116,9 @@ bb(CFG, L, _) -> update_bb(CFG,L,BB,_) -> hipe_sparc_cfg:bb_add(CFG,L,BB). +branch_preds(Branch,_) -> + hipe_sparc_cfg:branch_preds(Branch). + %% SPARC stuff def_use(I, Ctx) -> @@ -125,9 +136,24 @@ defines_all_alloc(I, _) -> is_move(I, _) -> hipe_sparc:is_pseudo_fmove(I). +is_spill_move(I, _) -> + hipe_sparc:is_pseudo_spill_fmove(I). + reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_sparc:mk_pseudo_fmove(Src, Dst). + +mk_goto(Label, _) -> + hipe_sparc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(sparc). + new_reg_nr(_) -> hipe_gensym:get_next_var(sparc). diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index c1c8dbbcd6..dacfb71b00 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -46,6 +46,7 @@ def_use/2, is_arg/2, % used by hipe_ls_regalloc is_move/2, + is_spill_move/2, is_fixed/2, % used by hipe_graph_coloring_regalloc is_global/2, is_precoloured/2, @@ -63,12 +64,19 @@ %% callbacks for hipe_regalloc_loop -export([check_and_rewrite/3]). -%% callbacks for hipe_regalloc_prepass --export([new_reg_nr/1, +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, update_reg_nr/3, update_bb/4, subst_temps/3]). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). + check_and_rewrite(CFG, Coloring, _) -> ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal'). @@ -156,6 +164,9 @@ bb(CFG,L,_) -> update_bb(CFG,L,BB,_) -> hipe_x86_cfg:bb_add(CFG,L,BB). +branch_preds(Instr,_) -> + hipe_x86_cfg:branch_preds(Instr). + %% X86 stuff def_use(Instruction,_) -> @@ -200,9 +211,33 @@ is_move(Instruction,_) -> false -> false end. +is_spill_move(Instruction,_) -> + hipe_x86:is_pseudo_spill_move(Instruction). + reg_nr(Reg,_) -> hipe_x86:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_x86:mk_move(Src, Dst). + +mk_goto(Label, _) -> + hipe_x86:mk_jmp_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_x86_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(x86). + new_reg_nr(_) -> hipe_gensym:get_next_var(x86). diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index 4b4c83f76d..3fe49e1f00 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -47,6 +47,7 @@ uses/2, defines/2, defines_all_alloc/2, + is_spill_move/2, is_global/2, reg_nr/2, physical_name/2, @@ -158,6 +159,9 @@ defines(I, _) -> defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). +is_spill_move(I, _) -> + hipe_x86:is_pseudo_spill_fmove(I). + temp_is_double(Temp) -> hipe_x86:temp_type(Temp) =:= 'double'. diff --git a/lib/hipe/rtl/hipe_icode2rtl.erl b/lib/hipe/rtl/hipe_icode2rtl.erl index 82970f04ab..6da8a76d34 100644 --- a/lib/hipe/rtl/hipe_icode2rtl.erl +++ b/lib/hipe/rtl/hipe_icode2rtl.erl @@ -532,8 +532,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> FalseLbl, Pred)]; '=:=' -> [Arg1, Arg2] = Args, + TypeTestLbl = hipe_rtl:mk_new_label(), [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, - hipe_rtl:label_name(GenLbl), Pred), + hipe_rtl:label_name(TypeTestLbl), Pred), + TypeTestLbl, + hipe_tagscheme:test_either_immed(Arg1, Arg2, FalseLbl, + hipe_rtl:label_name(GenLbl)), GenLbl, hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, TestRetName, [], not_remote), @@ -546,8 +550,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> TrueLbl, 1-Pred)]; '=/=' -> [Arg1, Arg2] = Args, + TypeTestLbl = hipe_rtl:mk_new_label(), [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, - hipe_rtl:label_name(GenLbl), 1-Pred), + hipe_rtl:label_name(TypeTestLbl), 1-Pred), + TypeTestLbl, + hipe_tagscheme:test_either_immed(Arg1, Arg2, TrueLbl, + hipe_rtl:label_name(GenLbl)), GenLbl, hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, TestRetName, [], not_remote), diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl index fd0d1f1223..52ea5db382 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl @@ -137,43 +137,6 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab end end; - {bs_put_integer, Size, Flags, ConstInfo} -> - Aligned = aligned(Flags), - LittleEndian = littleendian(Flags), - [NewOffset] = get_real(Dst), - case is_illegal_const(Size) of - true -> - [hipe_rtl:mk_goto(FalseLblName)]; - false -> - case ConstInfo of - fail -> - [hipe_rtl:mk_goto(FalseLblName)]; - _ -> - case Args of - [Src, Base, Offset] -> - CCode = static_int_c_code(NewOffset, Src, - Base, Offset, Size, - Flags, TrueLblName, - FalseLblName), - put_static_int(NewOffset, Src, Base, Offset, Size, - CCode, Aligned, LittleEndian, TrueLblName); - [Src, Bits, Base, Offset] -> - {SizeCode, SizeReg} = - hipe_rtl_binary:make_size(Size, Bits, - SystemLimitLblName, - FalseLblName), - CCode = int_c_code(NewOffset, Src, Base, - Offset, SizeReg, Flags, - TrueLblName, FalseLblName), - InCode = - put_dynamic_int(NewOffset, Src, Base, Offset, - SizeReg, CCode, Aligned, - LittleEndian, TrueLblName), - SizeCode ++ InCode - end - end - end; - {unsafe_bs_put_integer, 0, _Flags, _ConstInfo} -> [NewOffset] = get_real(Dst), case Args of @@ -186,44 +149,12 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab end; {unsafe_bs_put_integer, Size, Flags, ConstInfo} -> - case is_illegal_const(Size) of - true -> - [hipe_rtl:mk_goto(FalseLblName)]; - false -> - Aligned = aligned(Flags), - LittleEndian = littleendian(Flags), - [NewOffset] = get_real(Dst), - case ConstInfo of - fail -> - [hipe_rtl:mk_goto(FalseLblName)]; - _ -> - case Args of - [Src, Base, Offset] -> - CCode = static_int_c_code(NewOffset, Src, - Base, Offset, Size, - Flags, TrueLblName, - FalseLblName), - put_unsafe_static_int(NewOffset, Src, Base, - Offset, Size, - CCode, Aligned, LittleEndian, - TrueLblName); - [Src, Bits, Base, Offset] -> - {SizeCode, SizeReg} = - hipe_rtl_binary:make_size(Size, Bits, - SystemLimitLblName, - FalseLblName), - CCode = int_c_code(NewOffset, Src, Base, - Offset, SizeReg, Flags, - TrueLblName, FalseLblName), - InCode = - put_unsafe_dynamic_int(NewOffset, Src, Base, - Offset, SizeReg, CCode, - Aligned, LittleEndian, - TrueLblName), - SizeCode ++ InCode - end - end - end; + do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, true, + TrueLblName, FalseLblName, SystemLimitLblName); + + {bs_put_integer, Size, Flags, ConstInfo} -> + do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, false, + TrueLblName, FalseLblName, SystemLimitLblName); bs_utf8_size -> case Dst of @@ -360,6 +291,40 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab {Code, ConstTab} end. +%% Common implementation of bs_put_integer and unsafe_bs_put_integer +do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, SrcUnsafe, + TrueLblName, FalseLblName, SystemLimitLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, Base, Offset, Size, + Flags, TrueLblName, FalseLblName), + put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, SrcUnsafe, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = + hipe_rtl_binary:make_size(Size, Bits, SystemLimitLblName, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, + CCode, Aligned, LittleEndian, SrcUnsafe, + TrueLblName), + SizeCode ++ InCode + end + end + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Code that is used in the append and init writeable functions @@ -807,28 +772,8 @@ put_float(_NewOffset, _Src, _Base, _Offset, _Size, CCode, _Aligned, CCode. put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), - case {Aligned, LittleEndian} of - {true, true} -> - Init ++ - copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End; - {true, false} -> - Init ++ - copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End; - {false, true} -> - CCode; - {false, false} -> - Init ++ - copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End - end. - -put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + LittleEndian, SrcUnsafe, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName), case {Aligned, LittleEndian} of {true, true} -> Init ++ @@ -847,27 +792,8 @@ put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, end. put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), - case Aligned of - true -> - case LittleEndian of - true -> - Init ++ - copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ - End; - false -> - Init ++ - copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ - End - end; - false -> - CCode - end. - -put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + LittleEndian, SrcUnsafe, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName), case Aligned of true -> case LittleEndian of @@ -884,14 +810,13 @@ put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, CCode end. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Help functions used by the above %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -make_init_end(Src, CCode, TrueLblName) -> +make_init_end(Src, CCode, false, TrueLblName) -> [CLbl, SuccessLbl] = create_lbls(2), [UntaggedSrc] = create_regs(1), Init = [hipe_tagscheme:test_fixnum(Src, hipe_rtl:label_name(SuccessLbl), @@ -899,9 +824,8 @@ make_init_end(Src, CCode, TrueLblName) -> SuccessLbl, hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], End = [hipe_rtl:mk_goto(TrueLblName), CLbl| CCode], - {Init, End, UntaggedSrc}. - -make_init_end(Src, TrueLblName) -> + {Init, End, UntaggedSrc}; +make_init_end(Src, _CCode, true, TrueLblName) -> [UntaggedSrc] = create_regs(1), Init = [hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], End = [hipe_rtl:mk_goto(TrueLblName)], diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl index 35d1e7c8a4..68cbe75e85 100644 --- a/lib/hipe/rtl/hipe_tagscheme.erl +++ b/lib/hipe/rtl/hipe_tagscheme.erl @@ -40,6 +40,7 @@ fixnum_gt/5, fixnum_lt/5, fixnum_ge/5, fixnum_le/5, fixnum_val/1, fixnum_mul/4, fixnum_addsub/5, fixnum_andorxor/4, fixnum_not/2, fixnum_bsr/3, fixnum_bsl/3]). +-export([test_either_immed/4]). -export([unsafe_car/2, unsafe_cdr/2, unsafe_constant_element/3, unsafe_update_element/3, element/6]). -export([unsafe_closure_element/3]). @@ -363,14 +364,17 @@ test_matchstate(X, TrueLab, FalseLab, Pred) -> mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_BIN_MATCHSTATE, TrueLab, FalseLab, Pred)]. +test_bitstr_header(HdrTmp, TrueLab, FalseLab, Pred) -> + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + mask_and_compare(HdrTmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred). + test_bitstr(X, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), HalfTrueLab = hipe_rtl:mk_new_label(), - Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), HalfTrueLab, get_header(Tmp, X), - mask_and_compare(Tmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred)]. + test_bitstr_header(Tmp, TrueLab, FalseLab, Pred)]. test_binary(X, TrueLab, FalseLab, Pred) -> Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), @@ -378,12 +382,10 @@ test_binary(X, TrueLab, FalseLab, Pred) -> IsBoxedLab = hipe_rtl:mk_new_label(), IsBitStrLab = hipe_rtl:mk_new_label(), IsSubBinLab = hipe_rtl:mk_new_label(), - Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, [test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), FalseLab, Pred), IsBoxedLab, get_header(Tmp1, X), - mask_and_compare(Tmp1, Mask, ?TAG_HEADER_REFC_BIN, - hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), + test_bitstr_header(Tmp1, hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), IsBitStrLab, mask_and_compare(Tmp1, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, hipe_rtl:label_name(IsSubBinLab), TrueLab, 0.5), @@ -453,6 +455,10 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), test_fixnums_1([Tmp|Args], [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc]). +test_two_fixnums(Arg, Arg, FalseLab) -> + TrueLab = hipe_rtl:mk_new_label(), + [test_fixnum(Arg, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab]; test_two_fixnums(Arg1, Arg2, FalseLab) -> TrueLab = hipe_rtl:mk_new_label(), case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of @@ -567,8 +573,8 @@ fixnum_andorxor(AluOp, Arg1, Arg2, Res) -> case AluOp of 'xor' -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - [hipe_rtl:mk_alu(Tmp, Arg1, 'xor', Arg2), % clears tag :-( - hipe_rtl:mk_alu(Res, Tmp, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]; + [hipe_rtl:mk_alu(Tmp, Arg1, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Tmp, 'xor', Arg2)]; _ -> hipe_rtl:mk_alu(Res, Arg1, AluOp, Arg2) end. @@ -595,6 +601,21 @@ fixnum_bsl(Arg1, Arg2, Res) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Test if either of two values are immediate (primary tag IMMED1, 0x3) +test_either_immed(Arg1, Arg2, TrueLab, FalseLab) -> + %% This test assumes primary tag 0x0 is reserved and immed has tag 0x3 + 16#0 = ?TAG_PRIMARY_HEADER, + 16#3 = ?TAG_PRIMARY_IMMED1, + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp1, Arg1, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp2, Arg2, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp2, Tmp2, 'or', Tmp1), + hipe_rtl:mk_branch(Tmp2, 'and', hipe_rtl:mk_imm(2), eq, + FalseLab, TrueLab, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + unsafe_car(Dst, Arg) -> hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST))). @@ -631,14 +652,13 @@ unsafe_update_element(Tuple, Index, Value) -> % Index is an immediate element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_imm(A), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of valid -> %% This is no branch, 1 load and 3 alus = 4 instr + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple [untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_alu(Offset, UIndex, 'sll', @@ -647,72 +667,56 @@ element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> fixnums -> %% This is 1 branch, 1 load and 4 alus = 6 instr [untag_fixnum(UIndex, Index), - hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; _ -> %% This is 3 branches, 1 load and 5 alus = 9 instr [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), FixnumOkLab, untag_fixnum(UIndex, Index), - hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end; element(Dst, Index, Tuple, FailLabName, tuple, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple Header = hipe_rtl:mk_new_reg_gcsafe(), UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_new_reg_gcsafe(), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of fixnums -> %% This is 1 branch, 2 loads and 5 alus = 8 instr - [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + [get_header(Header, Tuple), untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; Num when is_integer(Num) -> %% This is 1 branch, 1 load and 3 alus = 5 instr - [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, hipe_rtl:mk_imm(Num), - Offset, UIndex, FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, hipe_rtl:mk_imm(Num), UIndex, FailLabName, + IndexOkLab); _ -> %% This is 2 branches, 2 loads and 6 alus = 10 instr [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), FixnumOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + get_header(Header, Tuple), untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end; element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), BoxedOkLab = hipe_rtl:mk_new_label(), TupleOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple Header = hipe_rtl:mk_new_reg_gcsafe(), UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_new_reg_gcsafe(), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of fixnums -> %% This is 3 branches, 2 loads and 5 alus = 10 instr [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + get_header(Header, Tuple), hipe_rtl:mk_branch(Header, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), @@ -720,23 +724,21 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; Num when is_integer(Num) -> %% This is 3 branches, 2 loads and 4 alus = 9 instr [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + get_header(Header, Tuple), hipe_rtl:mk_branch(Header, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - hipe_rtl:mk_imm(Num), FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, hipe_rtl:mk_imm(Num), FailLabName, + IndexOkLab)]; _ -> %% This is 4 branches, 2 loads, and 6 alus = 12 instr :( [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), @@ -745,8 +747,7 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + get_header(Header, Tuple), hipe_rtl:mk_branch(Header, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), @@ -754,20 +755,21 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end. -gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab) -> +gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab) -> + ZeroIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple %% now check that 1 <= UIndex <= Arity - %% if UIndex < 1, then (Arity - UIndex) >= Arity - %% if UIndex > Arity, then (Arity - UIndex) < 0, which is >=u Arity - %% otherwise, 0 <= (Arity - UIndex) < Arity - [hipe_rtl:mk_alu(InvIndex, Arity, 'sub', UIndex), - hipe_rtl:mk_branch(InvIndex, 'geu', Arity, FailLabName, + %% by checking the equivalent (except for when Arity>=2^(WordSize-1)) + %% (UIndex - 1) <u Arity + [hipe_rtl:mk_alu(ZeroIndex, UIndex, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_branch(ZeroIndex, 'geu', Arity, FailLabName, hipe_rtl:label_name(IndexOkLab), 0.01), IndexOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_alu(Offset, UIndex, 'sll', hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), hipe_rtl:mk_load(Dst, Ptr, Offset)]. diff --git a/lib/hipe/sparc/hipe_sparc.erl b/lib/hipe/sparc/hipe_sparc.erl index 916857b224..22e0761b69 100644 --- a/lib/hipe/sparc/hipe_sparc.erl +++ b/lib/hipe/sparc/hipe_sparc.erl @@ -87,6 +87,9 @@ mk_pseudo_set/2, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, pseudo_tailcall_funv/1, pseudo_tailcall_linkage/1, @@ -117,6 +120,9 @@ pseudo_fmove_src/1, pseudo_fmove_dst/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + mk_pseudo_fstore/3, mk_fstore/4, @@ -269,6 +275,10 @@ mk_pseudo_ret() -> #pseudo_ret{}. mk_pseudo_set(Imm, Dst) -> #pseudo_set{imm=Imm, dst=Dst}. +mk_pseudo_spill_move(Src, Temp, Dst) -> + #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(FunV, Arity, StkArgs, Linkage) -> #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage}. pseudo_tailcall_funv(#pseudo_tailcall{funv=FunV}) -> FunV. @@ -375,6 +385,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. +mk_pseudo_spill_fmove(Src, Temp, Dst) -> + #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + mk_pseudo_fstore(Src, Base, Disp) -> #pseudo_fstore{src=Src, base=Base, disp=Disp}. diff --git a/lib/hipe/sparc/hipe_sparc.hrl b/lib/hipe/sparc/hipe_sparc.hrl index 4eae6777a9..f60e516e59 100644 --- a/lib/hipe/sparc/hipe_sparc.hrl +++ b/lib/hipe/sparc/hipe_sparc.hrl @@ -88,6 +88,8 @@ -record(pseudo_move, {src, dst}). -record(pseudo_ret, {}). -record(pseudo_set, {imm, dst}). +-record(pseudo_spill_fmove, {src, temp, dst}). +-record(pseudo_spill_move, {src, temp, dst}). -record(pseudo_tailcall, {funv, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(rdy, {dst}). diff --git a/lib/hipe/sparc/hipe_sparc_assemble.erl b/lib/hipe/sparc/hipe_sparc_assemble.erl index 08bd47c4d2..2b82f41d23 100644 --- a/lib/hipe/sparc/hipe_sparc_assemble.erl +++ b/lib/hipe/sparc/hipe_sparc_assemble.erl @@ -32,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, 4), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl index 27374d187b..45c8e887b5 100644 --- a/lib/hipe/sparc/hipe_sparc_cfg.erl +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -23,6 +23,7 @@ -export([linearise/1]). -export([params/1]). -export([arity/1]). % for linear scan +-export([redirect_jmp/3, branch_preds/1]). -define(SPARC_CFG, true). % needed for cfg.inc @@ -77,28 +78,53 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #jmp{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_bp{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. --ifdef(notdef). redirect_jmp(I, Old, New) -> case I of - #b_label{label=Label} -> - if Old =:= Label -> I#b_label{label=New}; + #bp{'cond'='a',label=Label} -> + if Old =:= Label -> I#bp{label=New}; true -> I end; - #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> - I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + #pseudo_bp{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bp{true_label=New}; true -> I end, - if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + if Old =:= FalseLab -> I1#pseudo_bp{false_label=New}; true -> I1 end; - %% handle pseudo_call too? - _ -> I + #pseudo_call{contlab=ContLab0, sdesc=SDesc0} -> + SDesc = case SDesc0 of + #sparc_sdesc{exnlab=Old} -> SDesc0#sparc_sdesc{exnlab=New}; + #sparc_sdesc{exnlab=_} -> SDesc0 + end, + ContLab = if Old =:= ContLab0 -> New; + true -> ContLab0 + end, + I#pseudo_call{sdesc=SDesc, contlab=ContLab} end. --endif. mk_goto(Label) -> hipe_sparc:mk_b_label(Label). diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl index cb75f82e2b..4d4b11e301 100644 --- a/lib/hipe/sparc/hipe_sparc_defuse.erl +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -39,6 +39,7 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_move{dst=Dst} -> [Dst]; #pseudo_set{dst=Dst} -> [Dst]; + #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #rdy{dst=Dst} -> [Dst]; #sethi{dst=Dst} -> [Dst]; @@ -72,6 +73,7 @@ insn_use_gpr(I) -> funv_use(FunV, arity_use_gpr(Arity)); #pseudo_move{src=Src} -> [Src]; #pseudo_ret{} -> [hipe_sparc:mk_rv()]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); #store{src=Src,base=Base,disp=Disp} -> @@ -112,6 +114,7 @@ insn_def_fpr(I) -> #fp_unary{dst=Dst} -> [Dst]; #pseudo_fload{dst=Dst} -> [Dst]; #pseudo_fmove{dst=Dst} -> [Dst]; + #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; _ -> [] end. @@ -130,6 +133,7 @@ insn_use_fpr(I) -> #fp_unary{src=Src} -> [Src]; #pseudo_fmove{src=Src} -> [Src]; #pseudo_fstore{src=Src} -> [Src]; + #pseudo_spill_fmove{src=Src} -> [Src]; _ -> [] end. diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index 6f29c3c905..1f2a259ca1 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -82,6 +82,10 @@ do_insn(I, LiveOut, Context, FPoff) -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #pseudo_fmove{} -> {do_pseudo_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; _ -> {[I], FPoff} end. @@ -110,6 +114,22 @@ do_pseudo_move(I, Context, FPoff) -> end end. +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{src=Src,temp=Temp,dst=Dst} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_sparc:mk_pseudo_move(Src, Dst), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load(hipe_sparc:mk_sp(), SrcOffset, Temp, + mk_store(Temp, hipe_sparc:mk_sp(), DstOffset, [])) + end + end. + do_pseudo_fmove(I, Context, FPoff) -> Dst = hipe_sparc:pseudo_fmove_dst(I), Src = hipe_sparc:pseudo_fmove_src(I), @@ -127,6 +147,22 @@ do_pseudo_fmove(I, Context, FPoff) -> end end. +do_pseudo_spill_fmove(I, Context, FPoff) -> + #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to fmove + do_pseudo_fmove(hipe_sparc:mk_pseudo_fmove(Src, Dst), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_fload(hipe_sparc:mk_sp(), SrcOffset, Temp) + ++ mk_fstore(Temp, hipe_sparc:mk_sp(), DstOffset) + end + end. + pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl index 5fdb73e197..a724821992 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -38,6 +38,7 @@ ra_insn(I, Map, FPMap) -> #pseudo_call{} -> ra_pseudo_call(I, Map); #pseudo_move{} -> ra_pseudo_move(I, Map); #pseudo_set{} -> ra_pseudo_set(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #rdy{} -> ra_rdy(I, Map); #sethi{} -> ra_sethi(I, Map); @@ -47,6 +48,7 @@ ra_insn(I, Map, FPMap) -> #pseudo_fload{} -> ra_pseudo_fload(I, Map, FPMap); #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); #pseudo_fstore{} -> ra_pseudo_fstore(I, Map, FPMap); + #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap); _ -> I end. @@ -80,6 +82,12 @@ ra_pseudo_set(I=#pseudo_set{dst=Dst}, Map) -> NewDst = ra_temp(Dst, Map), I#pseudo_set{dst=NewDst}. +ra_pseudo_spill_move(I=#pseudo_spill_move{src=Src,temp=Temp,dst=Dst}, Map) -> + NewSrc = ra_temp(Src, Map), + NewTemp = ra_temp(Temp, Map), + NewDst = ra_temp(Dst, Map), + I#pseudo_spill_move{src=NewSrc,temp=NewTemp,dst=NewDst}. + ra_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV,stkargs=StkArgs}, Map) -> NewFunV = ra_funv(FunV, Map), NewStkArgs = ra_args(StkArgs, Map), @@ -120,6 +128,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, FPMap) -> NewDst = ra_temp_fp(Dst, FPMap), I#pseudo_fmove{src=NewSrc,dst=NewDst}. +ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst}, + FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewTemp = ra_temp_fp(Temp, FPMap), + NewDst = ra_temp_fp(Dst, FPMap), + I#pseudo_spill_fmove{src=NewSrc,temp=NewTemp,dst=NewDst}. + ra_pseudo_fstore(I=#pseudo_fstore{src=Src,base=Base}, Map, FPMap) -> NewSrc = ra_temp_fp(Src, FPMap), NewBase = ra_temp(Base, Map), diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl index 984c97fbd4..d3ecb43ec6 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -54,6 +54,7 @@ do_insn(I, TempMap, Strategy) -> #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); #pseudo_set{} -> do_pseudo_set(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); #rdy{} -> do_rdy(I, TempMap, Strategy); #sethi{} -> do_sethi(I, TempMap, Strategy); @@ -92,14 +93,16 @@ do_pseudo_call(I=#pseudo_call{funv=FunV}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{src=Src,dst=Dst}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move is a special case: in [XXX: not pseudo_tailcall] - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move and pseudo_spill_move [XXX: not pseudo_tailcall] + %% are special cases: in all other instructions, all temps must + %% be non-pseudos after register allocation. + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{src=Src,temp=Temp,dst=Dst}, + {[NewI], true}; _ -> {[I], false} end. @@ -109,6 +112,11 @@ do_pseudo_set(I=#pseudo_set{dst=Dst}, TempMap, Strategy) -> NewI = I#pseudo_set{dst=NewDst}, {[NewI | FixDst], DidSpill}. +do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + do_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV}, TempMap, Strategy) -> {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), NewI = I#pseudo_tailcall{funv=NewFunV}, diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl index 751e91425c..5fa3a5fc59 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -43,6 +43,7 @@ do_insn(I, TempMap) -> #pseudo_fload{} -> do_pseudo_fload(I, TempMap); #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); #pseudo_fstore{} -> do_pseudo_fstore(I, TempMap); + #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap); _ -> {[I], false} end. @@ -67,11 +68,13 @@ do_pseudo_fload(I=#pseudo_fload{dst=Dst}, TempMap) -> {[NewI | FixDst], DidSpill}. do_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, TempMap) -> - case temp_is_spilled(Dst, TempMap) of - true -> - {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), - NewI = I#pseudo_fmove{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_fmove + Temp = clone(Src), + NewI = #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst}, + {[NewI], true}; _ -> {[I], false} end. @@ -81,6 +84,11 @@ do_pseudo_fstore(I=#pseudo_fstore{src=Src}, TempMap) -> NewI = I#pseudo_fstore{src=NewSrc}, {FixSrc ++ [NewI], DidSpill}. +do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + %%% Fix Dst and Src operands. fix_src(Src, TempMap) -> diff --git a/lib/hipe/sparc/hipe_sparc_subst.erl b/lib/hipe/sparc/hipe_sparc_subst.erl index 1d0671464e..ce3bbb813a 100644 --- a/lib/hipe/sparc/hipe_sparc_subst.erl +++ b/lib/hipe/sparc/hipe_sparc_subst.erl @@ -44,6 +44,8 @@ insn_temps(T, I) -> #pseudo_move{src=S,dst=D} -> I#pseudo_move{src=T(S),dst=T(D)}; #pseudo_ret{} -> I; #pseudo_set{dst=D}-> I#pseudo_set{dst=T(D)}; + #pseudo_spill_move{src=S,temp=U,dst=D} -> + I#pseudo_spill_move{src=T(S),temp=T(U),dst=T(D)}; #pseudo_tailcall{funv=F,stkargs=Stk} -> I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; #pseudo_tailcall_prepare{} -> I; @@ -57,7 +59,9 @@ insn_temps(T, I) -> I#pseudo_fload{base=T(B),disp=S2(Di),dst=T(Ds)}; #pseudo_fmove{src=S,dst=D} -> I#pseudo_fmove{src=T(S),dst=T(D)}; #pseudo_fstore{src=S,base=B,disp=D} -> - I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)} + I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)}; + #pseudo_spill_fmove{src=S,temp=U,dst=D} -> + I#pseudo_spill_fmove{src=T(S),temp=T(U),dst=T(D)} end. -spec src2_temps(subst_fun(), src2()) -> src2(). diff --git a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl index caa0e71d0b..430e097b91 100644 --- a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl +++ b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl @@ -18,6 +18,7 @@ test() -> ok = test_R12B5_seg_fault(), ok = test_switch_neg_int(), ok = test_icode_range_anal(), + ok = test_icode_range_call(), ok. %%----------------------------------------------------------------------- @@ -461,3 +462,44 @@ g(X, Z) -> test -> non_zero_test; other -> other end. + +%%----------------------------------------------------------------------- +%% From: Rich Neswold +%% Date: Oct 5, 2016 +%% +%% The following was a bug in the HiPE compiler's range analysis. The +%% function range_client/2 below would would not stop when N reached 0, +%% but keep recursing into the second clause forever. +%% +%% The problem turned out to be in hipe_icode_range:analyse_call/2, +%% which would note update the argument ranges of the callee if the +%% result of the call was ignored. +%% ----------------------------------------------------------------------- +-define(TIMEOUT, 42). + +test_icode_range_call() -> + Self = self(), + Client = spawn_link(fun() -> range_client(Self, 4) end), + range_server(4, Client). + +range_server(0, _Client) -> + receive + stopping -> ok; + {called_with, 0} -> error(failure) + after ?TIMEOUT -> error(timeout) + end; +range_server(N, Client) -> + receive + {called_with, N} -> + Client ! proceed + after ?TIMEOUT -> error(timeout) + end, + range_server(N-1, Client). % tailcall (so the bug does not affect it) + +range_client(Server, 0) -> + Server ! stopping; +range_client(Server, N) -> + Server ! {called_with, N}, + receive proceed -> ok end, + range_client(Server, N - 1), % non-tailrecursive call with ignored result + ok. diff --git a/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl new file mode 100644 index 0000000000..9bf5cf52cd --- /dev/null +++ b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl @@ -0,0 +1,142 @@ +%%% -*- erlang-indent-level: 2 -*- +%%%---------------------------------------------------------------------- +%%% Contains +%%%---------------------------------------------------------------------- +-module(basic_edge_cases). + +-export([test/0]). + +test() -> + ok = test_float_spills(), + ok = test_infinite_loops(), + ok. + +%% Contains more float temps live at a single point than there are float +%% registers in any backend + +test_float_spills() -> + {{{2942.0,4670.0,3198.0,4926.0,2206.0,4734.0}, + {3118.0,2062.0,5174.0,3038.0,3618.0,3014.0}, + {2542.0,2062.0,4934.0,2590.0,3098.0,3062.0}, + {2950.0,3666.0,2574.0,5038.0,1866.0,2946.0}, + {3126.0,3050.0,3054.0,5070.0,2258.0,2714.0}, + {4734.0,2206.0,4926.0,3198.0,4670.0,2942.0}}, + 58937.0} = + mat66_flip_sum(35.0,86.0,32.0,88.0,33.0,57.0, + 22.0,77.0,91.0,80.0,14.0,33.0, + 51.0,28.0,87.0,20.0,91.0,11.0, + 68.0,83.0,64.0,82.0,10.0,86.0, + 74.0,18.0,08.0,52.0,10.0,14.0, + 89.0,34.0,64.0,66.0,58.0,55.0, + 0.0, 5), + ok. + +mat66_flip_sum(M11, M12, M13, M14, M15, M16, + M21, M22, M23, M24, M25, M26, + M31, M32, M33, M34, M35, M36, + M41, M42, M43, M44, M45, M46, + M51, M52, M53, M54, M55, M56, + M61, M62, M63, M64, M65, M66, + Acc, Ctr) + when is_float(M11), is_float(M12), is_float(M13), + is_float(M14), is_float(M15), is_float(M16), + is_float(M21), is_float(M22), is_float(M23), + is_float(M24), is_float(M25), is_float(M26), + is_float(M31), is_float(M32), is_float(M33), + is_float(M34), is_float(M35), is_float(M36), + is_float(M41), is_float(M42), is_float(M43), + is_float(M44), is_float(M45), is_float(M46), + is_float(M51), is_float(M52), is_float(M53), + is_float(M54), is_float(M55), is_float(M56), + is_float(M61), is_float(M62), is_float(M63), + is_float(M64), is_float(M65), is_float(M66), + is_float(Acc) -> + R11 = M66+M11, R12 = M65+M12, R13 = M64+M13, + R14 = M63+M14, R15 = M62+M15, R16 = M61+M16, + R21 = M56+M21, R22 = M55+M22, R23 = M54+M23, + R24 = M53+M24, R25 = M52+M25, R26 = M51+M26, + R31 = M46+M31, R32 = M45+M32, R33 = M44+M33, + R34 = M43+M34, R35 = M42+M35, R36 = M41+M36, + R41 = M26+M41, R42 = M25+M42, R43 = M24+M43, + R44 = M23+M44, R45 = M22+M45, R46 = M21+M46, + R51 = M36+M51, R52 = M35+M52, R53 = M34+M53, + R54 = M33+M54, R55 = M32+M55, R56 = M31+M56, + R61 = M16+M61, R62 = M15+M62, R63 = M14+M63, + R64 = M13+M64, R65 = M12+M65, R66 = M11+M66, + case Ctr of + 0 -> + {{{R11, R12, R13, R14, R15, R16}, + {R21, R22, R23, R24, R25, R26}, + {R31, R32, R33, R34, R35, R36}, + {R41, R42, R43, R44, R45, R46}, + {R51, R52, R53, R54, R55, R56}, + {R61, R62, R63, R64, R65, R66}}, + Acc}; + _ -> + NewAcc = 0.0 + M11 + M12 + M13 + M14 + M15 + M16 + + + M21 + M22 + M23 + M24 + M25 + M26 + + M31 + M32 + M33 + M34 + M35 + M36 + + M41 + M42 + M43 + M44 + M45 + M46 + + M51 + M52 + M53 + M54 + M55 + M56 + + M61 + M62 + M63 + M64 + M65 + M66 + + Acc, + mat66_flip_sum(R11+1.0, R12+1.0, R13+1.0, R14+1.0, R15+1.0, R16+1.0, + R21+1.0, R22+1.0, R23+1.0, R24+1.0, R25+1.0, R26+1.0, + R31+1.0, R32+1.0, R33+1.0, R34+1.0, R35+1.0, R36+1.0, + R41+1.0, R42+1.0, R43+1.0, R44+1.0, R45+1.0, R46+1.0, + R51+1.0, R52+1.0, R53+1.0, R54+1.0, R55+1.0, R56+1.0, + R61+1.0, R62+1.0, R63+1.0, R64+1.0, R65+1.0, R66+1.0, + NewAcc, Ctr-1) + end. + +%% Infinite loops must receive reduction tests, and might trip up basic block +%% weighting, leading to infinite weights and/or divisions by zero. + +test_infinite_loops() -> + OldTrapExit = process_flag(trap_exit, true), + ok = test_infinite_loop(fun infinite_recursion/0), + ok = test_infinite_loop(fun infinite_corecursion/0), + RecursiveFun = fun RecursiveFun() -> RecursiveFun() end, + ok = test_infinite_loop(RecursiveFun), + CorecursiveFunA = fun CorecursiveFunA() -> + CorecursiveFunA1 = fun () -> CorecursiveFunA() end, + CorecursiveFunA1() + end, + ok = test_infinite_loop(CorecursiveFunA), + CorecursiveFunB1 = fun(CorecursiveFunB) -> CorecursiveFunB() end, + CorecursiveFunB = fun CorecursiveFunB() -> + CorecursiveFunB1(CorecursiveFunB) + end, + ok = test_infinite_loop(CorecursiveFunB), + CorecursiveFunC1 = fun CorecursiveFunC1(Other) -> + Other(CorecursiveFunC1) + end, + CorecursiveFunC = fun CorecursiveFunC(Other) -> + Other(CorecursiveFunC) + end, + ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC1) end), + ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC) end), + true = process_flag(trap_exit, OldTrapExit), + ok. + +-define(INFINITE_LOOP_TIMEOUT, 100). +test_infinite_loop(Fun) -> + Tester = spawn_link(Fun), + kill_soon(Tester), + receive {'EXIT', Tester, awake} -> + undefined = process_info(Tester), + ok + after ?INFINITE_LOOP_TIMEOUT -> error(timeout) + end. + +infinite_recursion() -> infinite_recursion(). + +infinite_corecursion() -> infinite_corecursion_1(). +infinite_corecursion_1() -> infinite_corecursion(). + +kill_soon(Pid) -> + _ = spawn_link(fun() -> + timer:sleep(1), + erlang:exit(Pid, awake) + end), + ok. diff --git a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl index 94c187e364..96e39d565a 100644 --- a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl +++ b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl @@ -55,6 +55,8 @@ test_element(T0, T1, T2, N) -> List = lists:seq(1, N), Tuple = list_to_tuple(List), ok = get_elements(List, Tuple, 1), + %% element/2 of larger tuple with omitted bounds test + true = lists:all(fun(I) -> I * I =:= square(I) end, lists:seq(1, 20)), %% some cases that throw exceptions {'EXIT', _} = (catch my_element(0, T2)), {'EXIT', _} = (catch my_element(3, T2)), @@ -73,6 +75,18 @@ get_elements([Element|Rest], Tuple, Pos) -> get_elements([], _Tuple, _Pos) -> ok. +squares() -> + {1*1, 2*2, 3*3, 4*4, 5*5, 6*6, 7*7, 8*8, 9*9, 10*10, + 11*11, 12*12, 13*13, 14*14, 15*15, 16*16, 17*17, 18*18, 19*19, 20*20}. + +square(N) when is_integer(N), N >= 1, N =< 20 -> + %% The guard tests lets the range analysis conclude N to be an integer in the + %% 1..20 range. 20-1=19 is bigger than ?SET_LIMIT in erl_types.erl, and will + %% thus be represented by an ?int_range() rather than an ?int_set(). + %% Because of the range analysis, the bounds test of this element/2 call + %% should be omitted. + element(N, squares()). + %%-------------------------------------------------------------------- %% Tests set_element/3. diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile index 04de7f7823..eeb81ac482 100644 --- a/lib/hipe/util/Makefile +++ b/lib/hipe/util/Makefile @@ -48,7 +48,7 @@ HIPE_MODULES = hipe_vectors else HIPE_MODULES = endif -MODULES = hipe_timing hipe_dot hipe_digraph $(HIPE_MODULES) +MODULES = hipe_timing hipe_dot hipe_digraph hipe_dsets $(HIPE_MODULES) HRL_FILES= ERL_FILES= $(MODULES:%=%.erl) diff --git a/lib/hipe/util/hipe_dsets.erl b/lib/hipe/util/hipe_dsets.erl new file mode 100644 index 0000000000..9492cab0ff --- /dev/null +++ b/lib/hipe/util/hipe_dsets.erl @@ -0,0 +1,84 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% IMMUTABLE DISJOINT SETS OF ARBITRARY TERMS +%% +%% The disjoint set forests data structure, for elements of arbitrary types. +%% Note that the find operation mutates the set. +%% +%% We could do this more efficiently if we restricted the elements to integers, +%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used, +%% for a persistent interface (which isn't that nice when even accessors return +%% modified copies), the array module could be used. +-module(hipe_dsets). + +-export([new/1, find/2, union/3, to_map/1, to_rllist/1]). +-export_type([dsets/1]). + +-opaque dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}. + +-spec new([E]) -> dsets(E). +new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]). + +-spec find(E, dsets(E)) -> {E, dsets(E)}. +find(E, DS0) -> + case DS0 of + #{E := {root,_}} -> {E, DS0}; + #{E := {node,N}} -> + case find(N, DS0) of + {N, _}=T -> T; + {R, DS1} -> {R, DS1#{E := {node,R}}} + end; + _ -> error(badarg, [E, DS0]) + end. + +-spec union(E, E, dsets(E)) -> dsets(E). +union(X, Y, DS0) -> + {XRoot, DS1} = find(X, DS0), + case find(Y, DS1) of + {XRoot, DS2} -> DS2; + {YRoot, DS2} -> + #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2, + if XRR < YRR -> DS2#{XRoot := {node,YRoot}}; + XRR > YRR -> DS2#{YRoot := {node,XRoot}}; + true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}} + end + end. + +-spec to_map(dsets(E)) -> {#{Elem::E => Root::E}, dsets(E)}. +to_map(DS) -> + to_map(maps:keys(DS), DS, #{}). + +to_map([], DS, Acc) -> {Acc, DS}; +to_map([K|Ks], DS0, Acc) -> + {KR, DS} = find(K, DS0), + to_map(Ks, DS, Acc#{K => KR}). + +-spec to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}. +to_rllist(DS0) -> + {Lists, DS} = to_rllist(maps:keys(DS0), #{}, DS0), + {maps:to_list(Lists), DS}. + +to_rllist([], Acc, DS) -> {Acc, DS}; +to_rllist([E|Es], Acc, DS0) -> + {ERoot, DS} = find(E, DS0), + to_rllist(Es, map_append(ERoot, E, Acc), DS). + +map_append(Key, Elem, Map) -> + case Map of + #{Key := List} -> Map#{Key := [Elem|List]}; + #{} -> Map#{Key => [Elem]} + end. diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl index fc4e4edb24..788dacd11b 100644 --- a/lib/hipe/util/hipe_vectors.erl +++ b/lib/hipe/util/hipe_vectors.erl @@ -116,8 +116,7 @@ get(Vec, Ix) -> %% --------------------------------------------------------------------- -ifdef(USE_ARRAYS). -%%-opaque vector(E) :: array:array(E). --type vector(E) :: array:array(E). % Work around dialyzer bug +-opaque vector(E) :: array:array(E). new(N, V) -> array:new(N, {default, V}). size(V) -> array:size(V). diff --git a/lib/hipe/vsn.mk b/lib/hipe/vsn.mk index cb4174381a..172d976931 100644 --- a/lib/hipe/vsn.mk +++ b/lib/hipe/vsn.mk @@ -1 +1 @@ -HIPE_VSN = 3.15.3 +HIPE_VSN = 3.15.4 diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index 29cad6ca51..31e4f6e4ac 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -124,7 +124,6 @@ conv_insn(I, Map, Data) -> hipe_rtl:call_continuation(I), hipe_rtl:call_fail(I), hipe_rtl:call_type(I)), - %% XXX Fixme: this ++ is probably inefficient. {FixArgs++I2, Map2, Data}; #comment{} -> I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl index cc1c75b04d..f514dd1ded 100644 --- a/lib/hipe/x86/hipe_x86.erl +++ b/lib/hipe/x86/hipe_x86.erl @@ -167,6 +167,12 @@ mk_pseudo_spill/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, %% is_pseudo_tailcall/1, pseudo_tailcall_fun/1, @@ -425,6 +431,14 @@ mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> mk_pseudo_spill(List) -> #pseudo_spill{args=List}. +mk_pseudo_spill_fmove(Src, Temp, Dst) -> + #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + +mk_pseudo_spill_move(Src, Temp, Dst) -> + #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> check_linkage(Linkage), #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl index 567848bae5..6cd69905b2 100644 --- a/lib/hipe/x86/hipe_x86.hrl +++ b/lib/hipe/x86/hipe_x86.hrl @@ -91,6 +91,8 @@ -record(pseudo_call, {'fun', sdesc, contlab, linkage}). -record(pseudo_jcc, {cc, true_label, false_label, pred}). -record(pseudo_spill, {args=[]}). +-record(pseudo_spill_move, {src, temp, dst}). +-record(pseudo_spill_fmove, {src, temp, dst}). -record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(push, {src}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index ef9c32ef41..50919bdf4e 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -63,7 +63,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap, Options), Options), @@ -148,6 +148,8 @@ insn_size(I) -> translate_insn(I, Context, Options) -> case I of + #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} -> + [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}]; #alu{} -> Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), [{hipe_x86:alu_op(I), Arg, I}]; @@ -228,11 +230,11 @@ translate_insn(I, Context, Options) -> #move64{} -> translate_move64(I, Context); #movsx{} -> - Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), - [{movsx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movsx_src(I)), + [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}]; #movzx{} -> - Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), - [{movzx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movzx_src(I)), + [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}]; %% pseudo_call: eliminated before assembly %% pseudo_jcc: eliminated before assembly %% pseudo_tailcall: eliminated before assembly @@ -845,16 +847,15 @@ translate_move64(I, _Context) -> exit({?MODULE, I}). -endif. %%% mov{s,z}x -resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> - {temp_to_regArch(Dst), - case Type of - byte -> - mem_to_rm8(Src); - int16 -> - mem_to_rm16(Src); - int32 -> - mem_to_rm32(Src) - end}. +resolve_movx_src(Src=#x86_mem{type=Type}) -> + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end. %%% alu/cmp (_not_ test) resolve_alu_args(Src, Dst, Context) -> diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index a4544e1086..0a3c0fc9d6 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -19,7 +19,7 @@ succ/2, pred/2, bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). --export([linearise/1, params/1, arity/1, redirect_jmp/3]). +-export([linearise/1, params/1, arity/1, redirect_jmp/3, branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(PRED_NEEDED,true). @@ -72,6 +72,26 @@ branch_successors(Branch) -> #ret{} -> [] end. +branch_preds(Branch) -> + case Branch of + #jmp_switch{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + #pseudo_jcc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 5d7fadf8e5..2731836dc1 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -51,6 +51,8 @@ insn_def(I) -> #movzx{dst=Dst} -> dst_def(Dst); #pseudo_call{} -> call_clobbered(); #pseudo_spill{} -> []; + #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; + #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; #pseudo_tailcall_prepare{} -> tailcall_clobbered(); #shift{dst=Dst} -> dst_def(Dst); %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label @@ -108,6 +110,8 @@ insn_use(I) -> #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> addtemp(Fun, arity_use(Arity)); #pseudo_spill{args=Args} -> Args; + #pseudo_spill_fmove{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), arity_use(Arity)))); diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 3c2b67967a..558321d0c3 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -95,13 +95,17 @@ do_insn(I, LiveOut, Context, FPoff) -> #imul{} -> {[do_imul(I, Context, FPoff)], FPoff}; #move{} -> - {[do_move(I, Context, FPoff)], FPoff}; + {do_move(I, Context, FPoff), FPoff}; #movsx{} -> {[do_movsx(I, Context, FPoff)], FPoff}; #movzx{} -> {[do_movzx(I, Context, FPoff)], FPoff}; #pseudo_call{} -> do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #push{} -> @@ -144,22 +148,50 @@ do_fp_binop(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), [I#fp_binop{src=Src,dst=Dst}]. -do_fmove(I, Context, FPoff) -> - #fmove{src=Src0,dst=Dst0} = I, +do_fmove(I0, Context, FPoff) -> + #fmove{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#fmove{src=Src,dst=Dst}. + I = I0#fmove{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_fmove(I0, Context, FPoff) -> + #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}] + end. do_imul(I, Context, FPoff) -> #imul{src=Src0} = I, Src = conv_opnd(Src0, FPoff, Context), I#imul{src=Src}. -do_move(I, Context, FPoff) -> - #move{src=Src0,dst=Dst0} = I, +do_move(I0, Context, FPoff) -> + #move{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#move{src=Src,dst=Dst}. + I = I0#move{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_move(I0, Context, FPoff) -> + #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}] + end. do_movsx(I, Context, FPoff) -> #movsx{src=Src0,dst=Dst0} = I, diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl index b84e9bed91..925054dd68 100644 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -57,9 +57,10 @@ postpass(#defun{code=Code0}=Defun, Options) -> peephole_optimization(Insns) -> peep(Insns, [], []). -%% MoveSelf related peep-opts + +%% MoveSelf related peep-opts %% ------------------------------ -peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> +peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> peep(Insns, Res, [moveSelf1|Lst]); peep([I=#fmove{src=Src, dst=Dst}, #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> @@ -159,8 +160,7 @@ peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> %% ElimSet0 %% -------- -peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) -when (Dst==#x86_temp{}) -> +peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); %% ElimMDPow2 diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 4273e3cee8..e8abe78e00 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -140,6 +140,16 @@ ra_insn(I, Map, FpMap) -> I#pseudo_call{'fun'=Fun}; #pseudo_jcc{} -> I; + #pseudo_spill_fmove{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Temp = ra_opnd(Temp0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}; + #pseudo_spill_move{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Temp = ra_opnd(Temp0, Map), + Dst = ra_opnd(Dst0, Map), + I#pseudo_spill_move{src=Src, temp=Temp, dst=Dst}; #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> Fun = ra_opnd(Fun0, Map), StkArgs = ra_args(StkArgs0, Map), diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 28ec9c4277..db6391d5c1 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -74,6 +74,8 @@ do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} do_movx(I, TempMap, Strategy); #fmove{} -> do_fmove(I, TempMap, Strategy); + #pseudo_spill_move{} -> + do_pseudo_spill_move(I, TempMap, Strategy); #shift{} -> do_shift(I, TempMap, Strategy); #test{} -> @@ -190,10 +192,19 @@ do_lea(I, TempMap, Strategy) -> do_move(I, TempMap, Strategy) -> #move{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_check_byte_move(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], - DidSpill}. + case + is_record(Src0, x86_temp) andalso is_record(Dst0, x86_temp) + andalso is_spilled(Src0, TempMap) andalso is_spilled(Dst0, TempMap) + of + true -> + Tmp = clone(Src0, Strategy), + {[hipe_x86:mk_pseudo_spill_move(Src0, Tmp, Dst0)], true}; + false -> + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_check_byte_move(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], + DidSpill} + end. -ifdef(HIPE_AMD64). @@ -287,6 +298,13 @@ do_fmove(I, TempMap, Strategy) -> {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], DidSpill1 or DidSpill2}. +%%% Fix an pseudo_spill_move op. + +do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = is_spilled(Temp, TempMap), + {[I], false}. % nothing to do + %%% Fix a shift operation. %%% 1. remove pseudos from any explicit memory operands %%% 2. if the source is a register or memory position diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl index 7b5fb1352b..7db3b23d92 100644 --- a/lib/hipe/x86/hipe_x86_subst.erl +++ b/lib/hipe/x86/hipe_x86_subst.erl @@ -19,7 +19,7 @@ -endif. -module(?HIPE_X86_SUBST). --export([insn_temps/2]). +-export([insn_temps/2, insn_lbls/2]). -include("../x86/hipe_x86.hrl"). %% These should be moved to hipe_x86 and exported @@ -28,6 +28,7 @@ -type mfarec() :: #x86_mfa{}. -type prim() :: #x86_prim{}. -type funv() :: mfarec() | prim() | temp(). +-type label() :: non_neg_integer(). -type insn() :: tuple(). % for now -type subst_fun() :: fun((temp()) -> temp()). @@ -49,14 +50,19 @@ insn_temps(SubstTemp, I) -> #movzx {src=S, dst=D} -> I#movzx {src=O(S), dst=O(D)}; #shift {src=S, dst=D} -> I#shift {src=O(S), dst=O(D)}; #test {src=S, dst=D} -> I#test {src=O(S), dst=O(D)}; - #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; - #move64 {dst=D} -> I#move64 {dst=O(D)}; - #push {src=S} -> I#push {src=O(S)}; - #pop {dst=D} -> I#pop {dst=O(D)}; + #fp_unop{arg=[]} -> I; + #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; + #move64 {dst=D} -> I#move64 {dst=O(D)}; + #push {src=S} -> I#push {src=O(S)}; + #pop {dst=D} -> I#pop {dst=O(D)}; #jmp_switch{temp=T, jtab=J} -> I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; #pseudo_call{'fun'=F} -> I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; + #pseudo_spill_fmove{src=S, temp=T, dst=D} -> + I#pseudo_spill_fmove{src=O(S), temp=O(T), dst=O(D)}; + #pseudo_spill_move{src=S, temp=T, dst=D} -> + I#pseudo_spill_move{src=O(S), temp=O(T), dst=O(D)}; #pseudo_tailcall{'fun'=F, stkargs=Stk} -> I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), stkargs=lists:map(O, Stk)}; @@ -85,3 +91,22 @@ jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). -else. jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. -endif. + +-type lbl_subst_fun() :: fun((label()) -> label()). + +%% @doc Maps over the branch targets in an instruction +-spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). +insn_lbls(SubstLbl, I) -> + case I of + #jmp_label{label=Label} -> + I#jmp_label{label=SubstLbl(Label)}; + #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> + I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), + contlab=SubstLbl(Contlab)}; + #pseudo_jcc{true_label=T, false_label=F} -> + I#pseudo_jcc{true_label=SubstLbl(T), false_label=SubstLbl(F)} + end. + +sdesc_lbls(_SubstLbl, Sdesc=#x86_sdesc{exnlab=[]}) -> Sdesc; +sdesc_lbls(SubstLbl, Sdesc=#x86_sdesc{exnlab=Exnlab}) -> + Sdesc#x86_sdesc{exnlab=SubstLbl(Exnlab)}. |