diff options
author | Sverker Eriksson <[email protected]> | 2016-09-02 14:51:25 +0200 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2016-09-02 14:51:25 +0200 |
commit | c2f8b61ca3682281752fa0984699214dfcbf7ccd (patch) | |
tree | 3f44fa5c58d0d0d845045c3c5535aefad333b6dd /lib/hipe/x86 | |
parent | 87643cf92c061d7518299fdebb326e315c32e528 (diff) | |
parent | a19e3f0e1e82b793d58f9ef0db907ba637793fb6 (diff) | |
download | otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.gz otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.tar.bz2 otp-c2f8b61ca3682281752fa0984699214dfcbf7ccd.zip |
Merge branch 'sverker/hipe-performance-o1/PR-1154/OTP-13862'
* sverker/hipe-performance-o1/PR-1154:
hipe_sparc: Minimise CFG<->linear conversions
hipe_ppc: Minimise CFG<->linear conversions
hipe_arm: Minimise CFG<->linear conversions
hipe_x86: Use lea instead of move+add
hipe_arm: Improve peephole optimiser
hipe_arm: Be resilient to crappy RTL
hipe_ppc: Be resilient to crappy RTL
hipe_sparc: Be resilient to crappy RTL
hipe: Reuse liveness info for spillmin
hipe_x86: Minimise CFG<->linear conversions
hipe: Fix o0 and o1
hipe: Add o0 and o1 to tests
hipe_rtl_binary:get_word_integer/4: Handle imms
hipe_x86: Be resilient to crappy RTL
hipe_x86: LSRA for SSE2
Diffstat (limited to 'lib/hipe/x86')
-rw-r--r-- | lib/hipe/x86/Makefile | 2 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_rtl_to_x86.erl | 37 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_cfg.erl | 5 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_frame.erl | 56 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_main.erl | 22 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra.erl | 90 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra_finalise.erl | 36 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra_ls.erl | 60 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra_naive.erl | 11 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra_postconditions.erl | 38 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_ra_x87_ls.erl | 64 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_spill_restore.erl | 12 | ||||
-rw-r--r-- | lib/hipe/x86/hipe_x86_x87.erl | 5 |
13 files changed, 229 insertions, 209 deletions
diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..9b21270426 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,7 +60,6 @@ MODULES=hipe_rtl_to_x86 \ hipe_x86_ra_ls \ hipe_x86_ra_naive \ hipe_x86_ra_postconditions \ - hipe_x86_ra_x87_ls \ hipe_x86_registers \ hipe_x86_spill_restore \ hipe_x86_x87 @@ -133,7 +132,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..4c8c98551c 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -85,7 +85,7 @@ conv_insn(I, Map, Data) -> true -> conv_shift(Dst, Src1, BinOp, Src2); false -> - conv_alu(Dst, Src1, BinOp, Src2, []) + conv_alu_nocc(Dst, Src1, BinOp, Src2, []) end, {FixSrc1++FixSrc2++I2, Map2, Data}; #alub{} -> @@ -144,7 +144,7 @@ conv_insn(I, Map, Data) -> {I2, Map, Data}; #load{} -> {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of {byte, signed} -> @@ -171,6 +171,7 @@ conv_insn(I, Map, Data) -> Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), I2 = [hipe_x86:mk_move(Src, Dst)], {I2, Map0, Data}; + #move{src=Dst, dst=Dst} -> {[], Map, Data}; #move{} -> {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +183,11 @@ conv_insn(I, Map, Data) -> I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), {FixArgs++I2, Map0, Data}; #store{} -> - {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixSrc++FixOff++I2, Map2, Data}; + {FixPtr++FixSrc++FixOff++I2, Map2, Data}; #switch{} -> % this one also updates Data :-( %% from hipe_rtl2sparc, but we use a hairy addressing mode %% instead of doing the arithmetic manually @@ -206,7 +207,7 @@ conv_insn(I, Map, Data) -> {I2, Map1, NewData}; #fload{} -> {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], {I2, Map2, Data}; @@ -249,6 +250,22 @@ conv_insn(I, Map, Data) -> %%% Finalise the conversion of a 3-address ALU operation, taking %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) + andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'add', Src2, Tail); + true -> % Use LEA + Type = typeof_dst(Dst), + Mem = case hipe_x86:is_temp(Src1) of + true -> hipe_x86:mk_mem(Src1, Src2, Type); + false -> hipe_x86:mk_mem(Src2, Src1, Type) + end, + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> + conv_alu(Dst, Src1, BinOp, Src2, Tail). + conv_alu(Dst, Src1, 'imul', Src2, Tail) -> mk_imul(Src1, Src2, Dst, Tail); conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -572,6 +589,16 @@ conv_fun(Fun, Map) -> end end. +conv_src_noimm(Opnd, Map) -> + R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), + case hipe_x86:is_imm(Src) of + false -> R; + true -> + Tmp = new_untagged_temp(), + {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], + Tmp, NewMap} + end. + %%% Convert an RTL source operand (imm/var/reg). conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..b9f9c711f3 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -24,7 +24,7 @@ -export([init/1, labels/1, start_label/1, succ/2, pred/2, - bb/2, bb_add/3]). + bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1, params/1, arity/1, redirect_jmp/3]). @@ -33,6 +33,7 @@ -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_x86.hrl"). -include("../flow/cfg.hrl"). @@ -107,7 +108,7 @@ mk_goto(Label) -> hipe_x86:mk_jmp_label(Label). is_label(I) -> - hipe_x86:is_label(I). + case I of #label{} -> true; _ -> false end. label_name(Label) -> hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 4cdc04007d..fc782571bf 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -46,15 +46,13 @@ -include("../x86/hipe_x86.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> - Formals = fix_formals(hipe_x86:defun_formals(Defun)), - Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> + Formals = fix_formals(hipe_x86_cfg:params(CFG0)), + Temps0 = all_temps(CFG0, Formals), + MinFrame = defun_minframe(CFG0), Temps = ensure_minframe(MinFrame, Temps0), - CFG0 = hipe_x86_cfg:init(Defun), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps), - hipe_x86_cfg:linearise(CFG1). + do_body(CFG0, Liveness, Formals, Temps). fix_formals(Formals) -> fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +67,14 @@ do_body(CFG0, Liveness, Formals, Temps) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_x86_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -609,18 +598,20 @@ temp_is_pseudo(Temp) -> %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_x86_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). -compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, tset_filter/2, tset_to_list/1]}). @@ -654,16 +645,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..341269b698 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -53,19 +53,23 @@ ?RTL_TO_X86(MFA, RTL, Options) -> Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), "RTL-to-"?X86STR, Options), - SpillRest = + TransCFG = ?option_time(hipe_x86_cfg:init(Translated), + ?X86STR" to cfg", Options), + SpillRestCFG = case proplists:get_bool(caller_save_spill_restore, Options) of true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), ?X86STR" spill restore", Options); false -> - Translated + TransCFG end, - Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), - ?X86STR" register allocation", Options), - Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), - ?X86STR" frame", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), + AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), + ?X86STR" register allocation", Options), + FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), + ?X86STR" frame", Options), + Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), + ?X86STR" linearise", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..3af333ab4b 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -41,60 +41,80 @@ %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun0, Options) -> - %% ?HIPE_X86_PP:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), - %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> + hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, + 0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> + %% hipe_x86_cfg:pp(CFG0), + {CFG1, Coloring_fp, SpillIndex, Liveness} = + case ra_fp(CFG0, Options) of + {G, C, I} -> {G, C, I, undefined}; + {_,_,_,_}=T -> T + end, + %% hipe_x86_cfg:pp(CFG1), ?start_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun1)), - element(2,hipe_x86:defun_var_range(Defun1))), - {Defun2, Coloring} + code_size(CFG1), + element(2,hipe_gensym:var_range(x86))), + {CFG2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); naive -> - ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + ?HIPE_X86_RA_NAIVE:ra(CFG1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, ?stop_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun2)), - element(2,hipe_x86:defun_var_range(Defun2))), - %% ?HIPE_X86_PP:pp(Defun2), - ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + code_size(CFG2), + element(2,hipe_gensym:var_range(x86))), + %% hipe_x86_cfg:pp(CFG2), + ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> - case proplists:get_bool(inline_fp, Options) and - (proplists:get_value(regalloc, Options) =/= naive) of - true -> - case proplists:get_bool(x87, Options) of - true -> - hipe_amd64_ra_x87_ls:ra(Defun, Options); - false -> - hipe_regalloc_loop:ra_fp(Defun, Options, - hipe_coalescing_regalloc, - hipe_amd64_specific_sse2) - end; - false -> - {Defun,[],0} +ra_fp(CFG, Options) -> + Regalloc0 = proplists:get_value(regalloc, Options), + {Regalloc, TargetMod} = + case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of + false -> {naive, undefined}; + true -> + case proplists:get_bool(x87, Options) of + true -> {linear_scan, hipe_amd64_specific_x87}; + false -> {Regalloc0, hipe_amd64_specific_sse2} + end + end, + case Regalloc of + coalescing -> ra_fp(CFG, Options, hipe_coalescing_regalloc, TargetMod); + optimistic -> ra_fp(CFG, Options, hipe_optimistic_regalloc, TargetMod); + graph_color -> ra_fp(CFG, Options, hipe_graph_coloring_regalloc, + TargetMod); + linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Options, TargetMod); + naive -> {CFG,[],0}; + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) end. + +ra_fp(CFG, Options, RegAllocMod, TargetMod) -> + hipe_regalloc_loop:ra_fp(CFG, Options, RegAllocMod, TargetMod). -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Options) -> case proplists:get_bool(inline_fp, Options) of true -> - hipe_x86_ra_x87_ls:ra(Defun, Options); + hipe_x86_ra_ls:ra_fp(CFG, Options, hipe_x86_specific_x87); false -> - {Defun,[],0} + {CFG,[],0} end. -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..62009abb76 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -25,23 +25,36 @@ -define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_SSE2, hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr), Expr). -else. -define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_X87, hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),). -endif. -module(?HIPE_X86_RA_FINALISE). -export([finalise/4]). -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> - Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> + CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), case proplists:get_bool(x87, Options) of true -> - ?HIPE_X86_X87:map(Defun1); + ?HIPE_X86_X87:map(CFG1); _ -> - Defun1 + case + proplists:get_bool(inline_fp, Options) + and (proplists:get_value(regalloc, Options) =:= linear_scan) + of + %% Ugly, but required to avoid Dialyzer complaints about "Unknown + %% function" hipe_x86_sse2:map/1 + ?IF_HAS_SSE2(true -> + ?HIPE_X86_SSE2:map(CFG1);) + false -> + CFG1 + end end. %%% @@ -50,15 +63,16 @@ finalise(Defun, TempMap, FpMap, Options) -> %%% but I just want this to work now) %%% -finalise_ra(Defun, [], [], _Options) -> - Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> - Code = hipe_x86:defun_code(Defun), - {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> + CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> + {_, SpillLimit} = hipe_gensym:var_range(x86), Map = mk_ra_map(TempMap, SpillLimit), FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - NewCode = ra_code(Code, Map, FpMap0), - Defun#defun{code=NewCode}. + hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). ra_code(Code, Map, FpMap) -> [ra_insn(I, Map, FpMap) || I <- Code]. diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..9f019f9561 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -35,40 +35,65 @@ -endif. -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/3]). -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) -> SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( CFG), ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Options, TargetMod) -> + ?inc_counter(ra_calls_counter,1), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = TargetMod:number_of_temporaries(CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + {Coloring,NewSpillIndex,Liveness} = + regalloc(CFG, + undefined, + TargetMod:allocatable('linearscan'), + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + TargetMod), + + {NewCFG, _DidSpill} = + TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + TargetMod, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + ?add_spills(Options, NewSpillIndex), + {NewCFG, Coloring2, NewSpillIndex2, Liveness}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness0, SpillIndex, SpillLimit, Options) -> ?inc_counter(ra_iteration_counter,1), %% ?HIPE_X86_PP:pp(Defun), - CFG = hipe_x86_cfg:init(Defun), - {Coloring, NewSpillIndex} = + {Coloring, NewSpillIndex, Liveness} = regalloc( - CFG, + CFG, + Liveness0, ?HIPE_X86_REGISTERS:allocatable()-- [?HIPE_X86_REGISTERS:temp1(), ?HIPE_X86_REGISTERS:temp0()], [hipe_x86_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, ?HIPE_X86_SPECIFIC), - {NewDefun, _DidSpill} = + {NewCFG, _DidSpill} = ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), %% ?HIPE_X86_PP:pp(NewDefun), TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, ?HIPE_X86_SPECIFIC, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), @@ -79,8 +104,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> ok end, ?add_spills(Options, NewSpillIndex), - {NewDefun, Coloring2}. + {NewCFG, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, +regalloc(CFG,Liveness,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, + Target) -> + hipe_ls_regalloc:regalloc(CFG,Liveness,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..27e5af4aee 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -39,9 +39,8 @@ -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> - #defun{code=Code0} = X86Defun, - Code1 = do_insns(Code0), +ra(CFG0, Coloring_fp, Options) -> + CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), NofSpilledFloats = count_non_float_spills(Coloring_fp), NofFloats = length(Coloring_fp), ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,10 +48,12 @@ ra(X86Defun, Coloring_fp, Options) -> NofSpilledFloats - NofFloats), TempMap = [], - {X86Defun#defun{code=Code1, - var_range={0, hipe_gensym:get_var(x86)}}, + {CFG, TempMap}. +do_bb(_Lbl, BB) -> + hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). + count_non_float_spills(Coloring_fp) -> count_non_float_spills(Coloring_fp, 0). diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..c73d029b9b 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -40,14 +40,18 @@ -include("../main/hipe.hrl"). -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) -> %% io:format("Converting\n"), TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), %% io:format("Rewriting\n"), - #defun{code=Code0} = Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, - DidSpill}. + do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -169,14 +173,22 @@ do_jmp_switch(I, TempMap, Strategy) -> %%% Fix a lea op. do_lea(I, TempMap, Strategy) -> - #lea{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], - true} + #lea{mem=Mem0,temp=Temp0} = I, + {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), + case Mem of + #x86_mem{base=Base, off=#x86_imm{value=0}} -> + %% We've decayed into a move due to both operands being memory (there's an + %% 'add' in FixMem). + {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; + #x86_mem{} -> + {StoreTemp, Temp, DidSpill2} = + case is_mem_opnd(Temp0, TempMap) of + false -> {[], Temp0, false}; + true -> + Temp1 = clone2(Temp0, temp0(Strategy)), + {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} + end, + {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} end. %%% Fix a move op. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> - ?inc_counter(ra_calls_counter,1), - CFG = hipe_x86_cfg:init(Defun), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - - {Coloring,NewSpillIndex} = - ?HIPE_X86_RA_LS:regalloc(Cfg, - ?HIPE_X86_SPECIFIC_X87:allocatable(), - [hipe_x86_cfg:start_label(Cfg)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC_X87), - - ?add_spills(Options, NewSpillIndex), - {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..32b1eb7b40 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -25,13 +25,11 @@ -ifdef(HIPE_AMD64). -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(X86STR, "amd64"). -else. -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(X86STR, "x86"). -endif. @@ -51,15 +49,13 @@ -include("../flow/cfg.hrl"). % Added for the definition of #cfg{} %% Main function -spill_restore(Defun, Options) -> - CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), - CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), - hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> + CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), + ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). %% Performs the first pass of the algorithm. %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> - CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) -> %% get the labels bottom up Labels = hipe_x86_cfg:postorder(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..10bb6aa75c 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -41,13 +41,12 @@ %%---------------------------------------------------------------------- -map(Defun) -> - CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) -> %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - hipe_x86_cfg:linearise(CFG1). + CFG1. do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of |