diff options
-rw-r--r-- | lib/hipe/main/hipe.app.src | 1 | ||||
-rw-r--r-- | lib/hipe/main/hipe.erl | 17 | ||||
-rw-r--r-- | lib/hipe/regalloc/Makefile | 1 | ||||
-rw-r--r-- | lib/hipe/regalloc/hipe_range_split.erl | 1187 | ||||
-rw-r--r-- | lib/hipe/regalloc/hipe_regalloc_loop.erl | 18 | ||||
-rw-r--r-- | lib/hipe/regalloc/hipe_restore_reuse.erl | 15 |
6 files changed, 1232 insertions, 7 deletions
diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index 01499472f4..de0b255c01 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -148,6 +148,7 @@ hipe_ppc_specific_fp, hipe_ppc_subst, hipe_profile, + hipe_range_split, hipe_reg_worklists, hipe_regalloc_loop, hipe_regalloc_prepass, diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index f3e7c0879e..19b4e8bfe2 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1230,6 +1230,13 @@ option_text(regalloc) -> " optimistic - another variant of a coalescing allocator"; option_text(remove_comments) -> "Strip comments from intermediate code"; +option_text(ra_range_split) -> + "Split live ranges of temporaries live over call instructions\n" + "before performing register allocation.\n" + "Heuristically tries to move stack accesses to the cold path of function.\n" + "This range splitter is more sophisticated than 'ra_restore_reuse', but has\n" + "a significantly larger impact on compile time.\n" + "Should only be used with move coalescing register allocators."; option_text(ra_restore_reuse) -> "Split live ranges of temporaries such that straight-line\n" "code will not need to contain multiple restores from the same stack\n" @@ -1376,7 +1383,12 @@ opt_keys() -> pp_rtl_linear, ra_partitioned, ra_prespill, + ra_range_split, ra_restore_reuse, + range_split_min_gain, + range_split_mode1_fudge, + range_split_weight_power, + range_split_weights, regalloc, remove_comments, rtl_ssa, @@ -1436,7 +1448,8 @@ o1_opts(TargetArch) -> o2_opts(TargetArch) -> Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, - rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])], + ra_range_split, range_split_weights, % XXX: Having defaults here is ugly + rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre, ra_restore_reuse])], case TargetArch of T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets [icode_range | Common]; @@ -1484,7 +1497,9 @@ opt_negations() -> {no_pp_rtl_ssapre, pp_rtl_ssapre}, {no_ra_partitioned, ra_partitioned}, {no_ra_prespill, ra_prespill}, + {no_ra_range_split, ra_range_split}, {no_ra_restore_reuse, ra_restore_reuse}, + {no_range_split_weights, range_split_weights}, {no_remove_comments, remove_comments}, {no_rtl_ssa, rtl_ssa}, {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index 73ac232f51..81a92e5d35 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -50,6 +50,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \ hipe_optimistic_regalloc \ hipe_coalescing_regalloc \ hipe_graph_coloring_regalloc \ + hipe_range_split \ hipe_regalloc_loop \ hipe_regalloc_prepass \ hipe_restore_reuse \ diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl new file mode 100644 index 0000000000..39b086d9f7 --- /dev/null +++ b/lib/hipe/regalloc/hipe_range_split.erl @@ -0,0 +1,1187 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% TEMPORARY LIVE RANGE SPLITTING PASS +%% +%% Live range splitting is useful to allow a register allocator to allocate a +%% temporary to register for a part of its lifetime, even if it cannot be for +%% the entirety. This improves register allocation quality, at the cost of +%% making the allocation problem more time and memory intensive to solve. +%% +%% Optimal allocation can be achieved if all temporaries are split at every +%% program point (between all instructions), but this makes register allocation +%% infeasably slow in practice. Instead, this module uses heuristics to choose +%% which temporaries should have their live ranges split, and at which points. +%% +%% The range splitter only considers temps which are live during a call +%% instruction, since they're known to be spilled. The control-flow graph is +%% partitioned at call instructions and splitting decisions are made separately +%% for each partition. The register copy of a temp (if any) gets a separate name +%% in each partition. +%% +%% There are three different ways the range splitter may choose to split a +%% temporary in a program partition: +%% +%% * Mode1: Spill the temp before calls, and restore it after them +%% * Mode2: Spill the temp after definitions, restore it after calls +%% * Mode3: Spill the temp after definitions, restore it before uses +%% +%% To pick which of these should be used for each tempĂ—partiton pair, the range +%% splitter uses a cost function. The cost is simply the sum of the cost of all +%% expected stack accesses, and the cost for an individual stack access is based +%% on the probability weight of the basic block that it resides in. This biases +%% the range splitter so that it attempts moving stack accesses from a functions +%% hot path to the cold path. +%% +%% The heuristic has a couple of tuning knobs, adjusting its preference for +%% different spilling modes, aggressiveness, and how much influence the basic +%% block probability weights have. +%% +%% Edge case not handled: Call instructions directly defining a pseudo. In that +%% case, if that pseudo has been selected for mode2 spills, no spill is inserted +%% after the call. +-module(hipe_range_split). + +-export([split/5]). + +-compile(inline). + +%% -define(DO_ASSERT, 1). +%% -define(DEBUG, 1). +-include("../main/hipe.hrl"). + +%% Heuristic tuning constants +-define(DEFAULT_MIN_GAIN, 1.1). % option: range_split_min_gain +-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge +-define(DEFAULT_WEIGHT_POWER, 2). % option: range_split_weight_power +-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)). +-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)). +-define(HEUR_MAX_TEMPS, 20000). + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type liveness() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. +-type liveset() :: ordsets:ordset(temp()). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec split(target_cfg(), liveness(), target_module(), target_context(), + comp_options()) + -> target_cfg(). +split(TCFG0, Liveness, TargetMod, TargetContext, Options) -> + Target = {TargetMod, TargetContext}, + NoTemps = number_of_temporaries(TCFG0, Target), + if NoTemps > ?HEUR_MAX_TEMPS -> + ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n", + [?MODULE, NoTemps]), + hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext); + true -> + Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options), + {CFG0, Temps} = convert(TCFG0, Target), + Avail = avail_analyse(TCFG0, Liveness, Target), + Defs = def_analyse(CFG0, TCFG0), + RDefs = rdef_analyse(CFG0), + PLive = plive_analyse(CFG0), + {CFG, DUCounts, Costs, DSets0} = + scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target), + {DSets, _} = hipe_dsets:to_map(DSets0), + Renames = decide(DUCounts, Costs, Target, Options), + rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Internal program representation +%% +%% Second pass: Convert cfg to internal representation + +-record(cfg, { + rpo_labels :: [label()], + bbs :: #{label() => bb()} + }). +-type cfg() :: #cfg{}. + +cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS). + +cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO). + +-record(bb, { + code :: [code_elem()], + %% If the last instruction of code defines all allocatable registers + has_call :: boolean(), + succ :: [label()] + }). +-type bb() :: #bb{}. +-type code_elem() :: instr() | mode2_spills() | mode3_restores(). + +bb_code(#bb{code=Code}) -> Code. +bb_has_call(#bb{has_call=HasCall}) -> HasCall. +bb_succ(#bb{succ=Succ}) -> Succ. + +bb_butlast(#bb{code=Code}) -> + bb_butlast_1(Code). + +bb_butlast_1([_Last]) -> []; +bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)]. + +bb_last(#bb{code=Code}) -> lists:last(Code). + +-record(instr, { + i :: target_instr(), + def :: ordsets:ordset(temp()), + use :: ordsets:ordset(temp()) + }). +-type instr() :: #instr{}. + +-record(mode2_spills, { + temps :: ordsets:ordset(temp()) + }). +-type mode2_spills() :: #mode2_spills{}. + +-record(mode3_restores, { + temps :: ordsets:ordset(temp()) + }). +-type mode3_restores() :: #mode3_restores{}. + +-spec convert(target_cfg(), target()) -> {cfg(), temps()}. +convert(CFG, Target) -> + RPO = reverse_postorder(CFG, Target), + {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []), + {#cfg{rpo_labels = RPO, + bbs = maps:from_list(BBsList)}, + Temps}. + +convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps}; +convert_bbs([L|Ls], CFG, Target, Temps0, Acc) -> + Succs = hipe_gen_cfg:succ(CFG, L), + TBB = bb(CFG, L, Target), + TCode = hipe_bb:code(TBB), + {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []), + HasCall = defines_all_alloc(Last#instr.i, Target), + BB = #bb{code = Code, + has_call = HasCall, + succ = Succs}, + convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]). + +convert_code([], _Target, Temps, [Last|_]=Acc) -> + {lists:reverse(Acc), Last, Temps}; +convert_code([TI|TIs], Target, Temps0, Acc) -> + {TDef, TUse} = def_use(TI, Target), + I = #instr{i = TI, + def = ordsets:from_list(reg_names(TDef, Target)), + use = ordsets:from_list(reg_names(TUse, Target))}, + Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)), + convert_code(TIs, Target, Temps, [I|Acc]). + +-type temps() :: #{temp() => target_temp()}. +add_temps([], _Target, Temps) -> Temps; +add_temps([T|Ts], Target, Temps) -> + add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL +%% splits) +-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}. +-type defs() :: #{label() => defsetf()}. + +-spec def_analyse(cfg(), target_cfg()) -> defs(). +def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) -> + Defs0 = def_init(CFG), + def_dataf(RPO, TCFG, Defs0). + +-spec def_init(cfg()) -> defsi(). +def_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + false -> def_init_scan(bb_code(BB), defseti_new()); + true -> + {call, def_init_scan(bb_butlast(BB), defseti_new()), + defseti_from_ordset((bb_last(BB))#instr.def)} + end} + end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]). + +def_init_scan([], Defset) -> Defset; +def_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = defseti_add_ordset(Def, Defset0), + def_init_scan(Is, Defset). + +-spec def_dataf([label()], target_cfg(), defsi()) -> defs(). +def_dataf(Labels, TCFG, Defs0) -> + case def_dataf_once(Labels, TCFG, Defs0, 0) of + {Defs, 0} -> + def_finalise(Defs); + {Defs, _Changed} -> + def_dataf(Labels, TCFG, Defs) + end. + +-spec def_finalise(defsi()) -> defs(). +def_finalise(Defs) -> + maps:from_list([{K, defseti_finalise(BL)} + || {K, {call, BL, _}} <- maps:to_list(Defs)]). + +-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer()) + -> {defsi(), non_neg_integer()}. +def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed}; +def_dataf_once([L|Ls], TCFG, Defs0, Changed0) -> + AddPreds = + fun(Defset1) -> + lists:foldl(fun(P, Defset2) -> + defseti_union(defout(P, Defs0), Defset2) + end, Defset1, hipe_gen_cfg:pred(TCFG, L)) + end, + Defset = + case Defset0 = maps:get(L, Defs0) of + {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout}; + _ -> AddPreds(Defset0) + end, + Changed = case Defset =:= Defset0 of + true -> Changed0; + false -> Changed0+1 + end, + def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed). + +-spec defout(label(), defsi()) -> defseti(). +defout(L, Defs) -> + case maps:get(L, Defs) of + {call, _DefButLast, Defout} -> Defout; + Defout -> Defout + end. + +-spec defbutlast(label(), defs()) -> defsetf(). +defbutlast(L, Defs) -> maps:get(L, Defs). + +-spec defseti_new() -> defseti(). +-spec defseti_union(defseti(), defseti()) -> defseti(). +-spec defseti_add_ordset(ordset:ordset(temp()), defseti()) -> defseti(). +-spec defseti_from_ordset(ordset:ordset(temp())) -> defseti(). +-spec defseti_finalise(defseti()) -> defsetf(). +-spec defsetf_member(temp(), defsetf()) -> boolean(). +-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf()) + -> ordsets:ordset(temp()). + +-type defseti() :: bitord(). +defseti_new() -> bitord_new(). +defseti_union(A, B) -> bitord_union(A, B). +defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D). +defseti_from_ordset(OS) -> bitord_from_ordset(OS). +defseti_finalise(D) -> bitarr_from_bitord(D). + +-type defsetf() :: bitarr(). +defsetf_member(E, D) -> bitarr_get(E, D). + +defsetf_intersect_ordset([], _D) -> []; +defsetf_intersect_ordset([E|Es], D) -> + case bitarr_get(E, D) of + true -> [E|defsetf_intersect_ordset(Es,D)]; + false -> defsetf_intersect_ordset(Es,D) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines +%% in mode2) +-type rdefsi() :: #{label() => + {call, rdefseti(), [label()]} + | {nocall, rdefseti(), rdefseti(), [label()]}}. +-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}. + +-spec rdef_analyse(cfg()) -> rdefs(). +rdef_analyse(CFG = #cfg{rpo_labels=RPO}) -> + Defs0 = rdef_init(CFG), + PO = rdef_postorder(RPO, CFG, []), + rdef_dataf(PO, Defs0). + +%% Filter out 'call' labels, since they don't change +-spec rdef_postorder([label()], cfg(), [label()]) -> [label()]. +rdef_postorder([], _CFG, Acc) -> Acc; +rdef_postorder([L|Ls], CFG, Acc) -> + case bb_has_call(cfg_bb(L, CFG)) of + true -> rdef_postorder(Ls, CFG, Acc); + false -> rdef_postorder(Ls, CFG, [L|Acc]) + end. + +-spec rdef_init(cfg()) -> rdefsi(). +rdef_init(#cfg{bbs = BBs}) -> + maps:from_list( + [{L, case HasCall of + true -> + Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()), + {call, Defin, Succs}; + false -> + Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()), + {nocall, Gen, rdefseti_top(), Succs} + end} + || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti(). +rdef_init_scan([], Defset) -> Defset; +rdef_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = rdefseti_add_ordset(Def, Defset0), + rdef_init_scan(Is, Defset). + +-spec rdef_dataf([label()], rdefsi()) -> rdefs(). +rdef_dataf(Labels, Defs0) -> + case rdef_dataf_once(Labels, Defs0, 0) of + {Defs, 0} -> + rdef_finalise(Defs); + {Defs, _Changed} -> + rdef_dataf(Labels, Defs) + end. + +-spec rdef_finalise(rdefsi()) -> rdefs(). +rdef_finalise(Defs) -> + maps:map(fun(L, V) -> + Succs = rsuccs_val(V), + Defout0 = rdefout_intersect(L, Defs, rdefseti_top()), + {final, rdefset_finalise(Defout0), Succs} + end, Defs). + +-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer()) + -> {rdefsi(), non_neg_integer()}. +rdef_dataf_once([], Defs, Changed) -> {Defs, Changed}; +rdef_dataf_once([L|Ls], Defs0, Changed0) -> + #{L := {nocall, Gen, Defin0, Succs}} = Defs0, + Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)), + Defset = {nocall, Gen, Defin, Succs}, + Changed = case Defin =:= Defin0 of + true -> Changed0; + false -> Changed0+1 + end, + rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed). + +-spec rdefin(label(), rdefsi()) -> rdefseti(). +rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)). +rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin; +rdefin_val({call, Defin, _Succs}) -> Defin. + +-spec rsuccs(label(), rdefsi()) -> [label()]. +rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)). +rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs; +rsuccs_val({call, _Defin, Succs}) -> Succs. + +-spec rdefout(label(), rdefs()) -> rdefsetf(). +rdefout(L, Defs) -> + #{L := {final, Defout, _Succs}} = Defs, + Defout. + +-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti(). +rdefout_intersect(L, Defs, Init) -> + lists:foldl(fun(S, Acc) -> + rdefseti_intersect(rdefin(S, Defs), Acc) + end, Init, rsuccs(L, Defs)). + +-type rdefseti() :: bitord() | top. +rdefseti_top() -> top. +rdefseti_empty() -> bitord_new(). +-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti(). +rdefseti_from_ordset(OS) -> bitord_from_ordset(OS). + +-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti(). +rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf +rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D). + +-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_union(top, _) -> top; +rdefseti_union(_, top) -> top; +rdefseti_union(A, B) -> bitord_union(A, B). + +-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_intersect(top, D) -> D; +rdefseti_intersect(D, top) -> D; +rdefseti_intersect(A, B) -> bitord_intersect(A, B). + +-type rdefsetf() :: {arr, bitarr()} | top. +-spec rdefset_finalise(rdefseti()) -> rdefsetf(). +rdefset_finalise(top) -> top; +rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}. + +%% rdefsetf_top() -> top. +rdefsetf_empty() -> {arr, bitarr_new()}. + +-spec rdefsetf_add_ordset(ordset:ordset(temp()), rdefsetf()) -> rdefsetf(). +rdefsetf_add_ordset(_, top) -> top; +rdefsetf_add_ordset(OS, {arr, Arr}) -> + {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}. + +-spec rdef_step(instr(), rdefsetf()) -> rdefsetf(). +rdef_step(#instr{def=Def}, Defset) -> + %% ?ASSERT(not defines_all_alloc(I, Target)), + rdefsetf_add_ordset(Def, Defset). + +-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf()) + -> ordsets:ordset(temp()). +ordset_subtract_rdefsetf(_, top) -> []; +ordset_subtract_rdefsetf(OS, {arr, Arr}) -> + %% Lazy implementation; could do better if OS can grow + lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Integer sets represented as bit sets +%% +%% Two representations; bitord() and bitarr() +-define(LIMB_IX_BITS, 11). +-define(LIMB_BITS, (1 bsl ?LIMB_IX_BITS)). +-define(LIMB_IX(Index), (Index bsr ?LIMB_IX_BITS)). +-define(BIT_IX(Index), (Index band (?LIMB_BITS - 1))). +-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))). + +%% bitord(): fast at union/2 and can be compared for equality with '=:=' +-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitord_new() -> bitord(). +bitord_new() -> []. + +-spec bitord_union(bitord(), bitord()) -> bitord(). +bitord_union(Lhs, Rhs) -> + orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs). + +-spec bitord_intersect(bitord(), bitord()) -> bitord(). +bitord_intersect([], _) -> []; +bitord_intersect(_, []) -> []; +bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) -> + [{K, L band R} | bitord_intersect(Ls, Rs)]; +bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK -> + bitord_intersect(Ls, Rs); +bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK -> + bitord_intersect(Ls, Rs). + +-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord(). +bitord_from_ordset([]) -> []; +bitord_from_ordset([B|Bs]) -> + bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)). + +bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) -> + bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B)); +bitord_from_ordset_1([B|Bs], Key, Val) -> + [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))]; +bitord_from_ordset_1([], Key, Val) -> [{Key, Val}]. + +%% bitarr(): fast (enough) at get/2 +-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitarr_new() -> bitarr(). +bitarr_new() -> array:new({default, 0}). + +-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean(). +bitarr_get(Index, Array) -> + Limb = array:get(?LIMB_IX(Index), Array), + 0 =/= (Limb band ?BIT_MASK(Index)). + +-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr(). +bitarr_set(Index, Array) -> + Limb0 = array:get(?LIMB_IX(Index), Array), + Limb = Limb0 bor ?BIT_MASK(Index), + array:set(?LIMB_IX(Index), Limb, Array). + +-spec bitarr_from_bitord(bitord()) -> bitarr(). +bitarr_from_bitord(Ord) -> + array:from_orddict(Ord, 0). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Sixth pass: Partition-local liveness analysis +%% +%% As temps are not spilled when exiting a partition in mode2, only +%% partition-local uses need to be considered when deciding which temps need +%% restoring at partition entry. + +-type plive() :: #{label() => + {call, liveset(), [label()]} + | {nocall, {liveset(), liveset()}, liveset(), [label()]}}. + +-spec plive_analyse(cfg()) -> plive(). +plive_analyse(CFG) -> + Defs0 = plive_init(CFG), + PO = cfg_postorder(CFG), + plive_dataf(PO, Defs0). + +-spec plive_init(cfg()) -> plive(). +plive_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + true -> + {Gen, _} = plive_init_scan(bb_code(BB)), + {call, Gen, Succs}; + false -> + GenKill = plive_init_scan(bb_code(BB)), + {nocall, GenKill, liveset_empty(), Succs} + end} + end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec plive_init_scan([instr()]) -> {liveset(), liveset()}. +plive_init_scan([]) -> {liveset_empty(), liveset_empty()}; +plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) -> + {Gen0, Kill0} = plive_init_scan(Is), + Gen1 = liveset_subtract(Gen0, InstrKill), + Gen = liveset_union(Gen1, InstrGen), + Kill1 = liveset_union(Kill0, InstrKill), + Kill = liveset_subtract(Kill1, InstrGen), + {Gen, Kill}. + +-spec plive_dataf([label()], plive()) -> plive(). +plive_dataf(Labels, PLive0) -> + case plive_dataf_once(Labels, PLive0, 0) of + {PLive, 0} -> PLive; + {PLive, _Changed} -> + plive_dataf(Labels, PLive) + end. + +-spec plive_dataf_once([label()], plive(), non_neg_integer()) -> + {plive(), non_neg_integer()}. +plive_dataf_once([], PLive, Changed) -> {PLive, Changed}; +plive_dataf_once([L|Ls], PLive0, Changed0) -> + Liveset = + case Liveset0 = maps:get(L, PLive0) of + {call, Livein, Succs} -> + {call, Livein, Succs}; + {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} -> + Liveout = pliveout(L, PLive0), + Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)), + {nocall, GenKill, Livein, Succs} + end, + Changed = case Liveset =:= Liveset0 of + true -> Changed0; + false -> Changed0+1 + end, + plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed). + +-spec pliveout(label(), plive()) -> liveset(). +pliveout(L, PLive) -> + liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]). + +-spec psuccs(label(), plive()) -> [label()]. +psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)). +psuccs_val({call, _Livein, Succs}) -> Succs; +psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs. + +-spec plivein(label(), plive()) -> liveset(). +plivein(L, PLive) -> plivein_val(maps:get(L, PLive)). +plivein_val({call, Livein, _Succs}) -> Livein; +plivein_val({nocall, _GenKill, Livein, _Succs}) -> Livein. + +liveset_empty() -> ordsets:new(). +liveset_subtract(A, B) -> ordsets:subtract(A, B). +liveset_union(A, B) -> ordsets:union(A, B). +liveset_union(LivesetList) -> ordsets:union(LivesetList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass: Compute dataflow analyses required for placing mode3 +%% spills/restores. +%% Reuse analysis implementation in hipe_restore_reuse. +%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one +%% too. +-type avail() :: hipe_restore_reuse:avail(). + +-spec avail_analyse(target_cfg(), liveness(), target()) -> avail(). +avail_analyse(CFG, Liveness, Target) -> + hipe_restore_reuse:analyse(CFG, Liveness, Target). + +-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()). +mode3_split_in_block(L, Avail) -> + hipe_restore_reuse:split_in_block(L, Avail). + +-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()). +mode3_block_renameset(L, Avail) -> + hipe_restore_reuse:renamed_in_block(L, Avail). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Seventh pass +%% +%% Compute program space partitioning, collect information required by the +%% heuristic. +-type part_key() :: label(). +-type part_dsets() :: hipe_dsets:dsets(part_key()). +-type part_dsets_map() :: #{part_key() => part_key()}. +-type ducounts() :: #{part_key() => ducount()}. + +-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(), + target()) -> {cfg(), ducounts(), costs(), part_dsets()}. +scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) -> + #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0, + CFG = CFG0#cfg{bbs=#{}}, % kill reference + DSets0 = hipe_dsets:new(Labels), + Costs0 = costs_new(), + {BBs, DUCounts0, Costs1, DSets1} = + scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail, + Target, #{}, Costs0, DSets0, []), + {RLList, DSets2} = hipe_dsets:to_rllist(DSets1), + {Costs, DSets} = costs_map_roots(DSets2, Costs1), + DUCounts = collect_ducounts(RLList, DUCounts0, #{}), + {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}. + +-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts()) + -> ducounts(). +collect_ducounts([], _, Acc) -> Acc; +collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) -> + DUCount = lists:foldl( + fun(Key, FAcc) -> + ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc) + end, ducount_new(), Ls), + collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}). + +-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(), + rdefs(), avail(), target(), ducounts(), costs(), part_dsets(), + [{label(), bb()}]) + -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}. +scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target, + DUCounts, Costs, DSets, Acc) -> + {Acc, DUCounts, Costs, DSets}; +scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target, + DUCounts0, Costs0, DSets0, Acc) -> + Wt = weight(L, Weights), + {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} = + case bb_has_call(BB) of + false -> + DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end, + DSets0, bb_succ(BB)), + {DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs), + liveout(Liveness, L, Target)}; + true -> + LastI = #instr{def=LastDef} = bb_last(BB), + LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef), + %% We can omit the spill of a temp that has not been defined since the + %% last time it was spilled + SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)), + Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0), + Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) -> + SLivein = livein(Liveness, S, Target), + SPLivein = plivein(S, PLive), + SWt = weight_scaled(L, BranchWt, Weights), + Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2), + costs_insert(entry2, S, SWt, SPLivein, Costs3) + end, Costs1, branch_preds(LastI#instr.i, Target)), + {DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore} + end, + Mode3Splits = mode3_split_in_block(L, Avail), + {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []), + {Code, DUCount, Mode2Spills} = + scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode), + DUCounts = DUCounts0#{L => DUCount}, + M2SpillSet = ordsets:from_list(Mode2Spills), + Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5), + Mode3Renames = mode3_block_renameset(L, Avail), + Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6), + Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7), + Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8), + scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts, + Costs, DSets, [{L,BB#bb{code=Code}}|Acc]). + +-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs()) + -> costs(). +add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) -> + Unsplit = orddict_without_ordset(Mode3Renames, + orddict:from_list(ducount_to_list(DUCount))), + add_unsplit_mode3_costs_1(Unsplit, L, Costs). + +-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs()) + -> costs(). +add_unsplit_mode3_costs_1([], _L, Costs) -> Costs; +add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) -> + add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)). + +%% @doc Returns a new orddict without keys in Set and their associated values. +-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V)) + -> orddict:orddict(K, V). +orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K -> + orddict_without_ordset(Ss, Dict); +orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K -> + [D|orddict_without_ordset(Set, Ds)]; +orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K + orddict_without_ordset(Ss, Ds); +orddict_without_ordset(_, []) -> []; +orddict_without_ordset([], Dict) -> Dict. + +%% Scans the code forward, collecting and inserting mode3 restores +-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()), + [code_elem()]) + -> {[code_elem()], ordsets:ordset(temp())}. +scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored}; +scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) -> + #instr{def=Def, use=Use} = I, + {ToRestore, SplitHere1} = + lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0), + SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1), + Acc = + case ToRestore of + [] -> [I | Acc0]; + _ -> [I, #mode3_restores{temps=ToRestore} | Acc0] + end, + scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc). + +%% Scans the code backwards, collecting def/use counts and mode2 spills +-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(), + [temp()], [code_elem()]) + -> {[code_elem()], ducount(), [temp()]}. +scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) -> + {Acc, DUCount, Spills}; +scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) -> + scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]); +scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) -> + #instr{def=Def,use=Use} = I, + DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)), + Livein = liveness_step(I, Liveout), + RDefin = rdef_step(I, RDefout), + %% The temps that would be spilled after I in mode 2 + NewSpills = ordset_subtract_rdefsetf( + ordsets:intersection(Def, Liveout), + RDefout), + ?ASSERT(NewSpills =:= (NewSpills -- Spills0)), + Spills = NewSpills ++ Spills0, + Acc1 = case NewSpills of + [] -> Acc0; + _ -> [#mode2_spills{temps=NewSpills}|Acc0] + end, + scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]). + +-spec liveness_step(instr(), liveset()) -> liveset(). +liveness_step(#instr{def=Def, use=Use}, Liveout) -> + ordsets:union(Use, ordsets:subtract(Liveout, Def)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass: compute basic-block weighting + +-type weights() :: no_bb_weights + | {hipe_bb_weights:bb_weights(), float()}. + +-spec weight(label(), weights()) -> float(). +weight(L, Weights) -> weight_scaled(L, 1.0, Weights). + +-spec compute_weights(target_cfg(), target_module(), target_context(), + comp_options()) -> weights(). +compute_weights(CFG, TargetMod, TargetContext, Options) -> + case proplists:get_bool(range_split_weights, Options) of + false -> no_bb_weights; + true -> + {hipe_bb_weights:compute(CFG, TargetMod, TargetContext), + ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power, + Options, ?DEFAULT_WEIGHT_POWER))} + end. + +-spec weight_scaled(label(), float(), weights()) -> float(). +weight_scaled(_L, _Scale, no_bb_weights) -> 1.0; +weight_scaled(L, Scale, {Weights, Const}) -> + Wt0 = hipe_bb_weights:weight(L, Weights) * Scale, + Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0), + ?WEIGHT_FUN(Wt, Const). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Heuristic splitting decision. +%% +%% Decide which temps to split, in which parts, and pick new names for them. +-type spill_mode() :: mode1 % Spill temps at partition exits + | mode2 % Spill temps at definitions + | mode3.% Spill temps at definitions, restore temps at uses +-type ren() :: #{temp() => {spill_mode(), temp()}}. +-type renames() :: #{label() => ren()}. + +-record(heur_par, { + mode1_fudge :: float(), + min_gain :: float() + }). +-type heur_par() :: #heur_par{}. + +-spec decide(ducounts(), costs(), target(), comp_options()) -> renames(). +decide(DUCounts, Costs, Target, Options) -> + Par = #heur_par{ + mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options, + ?DEFAULT_MODE1_FUDGE), + min_gain = proplists:get_value(range_split_min_gain, Options, + ?DEFAULT_MIN_GAIN)}, + decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}). + +-spec decide_parts([{part_key(), ducount()}], costs(), target(), + heur_par(), renames()) + -> renames(). +decide_parts([], _Costs, _Target, _Par, Acc) -> Acc; +decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) -> + Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par, + #{}), + decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}). + +-spec decide_temps([{temp(), float()}], part_key(), costs(), target(), + heur_par(), ren()) + -> ren(). +decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc; +decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) -> + SpillCost1 = costs_query(Temp, entry1, Part, Costs) + + costs_query(Temp, exit, Part, Costs), + SpillCost2 = costs_query(Temp, entry2, Part, Costs) + + costs_query(Temp, spill, Part, Costs), + SpillCost3 = costs_query(Temp, restore, Part, Costs), + Acc = + %% SpillCost1 =:= 0.0 usually means the temp is local to the partition; + %% hence no need to split it + case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S) + andalso (not is_precoloured(Temp, Target)) + andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain)) + of + false -> Acc0; + true -> + Mode = + if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2, + Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 -> + mode1; + SpillCost2 < SpillCost3 -> + mode2; + true -> + mode3 + end, + Acc0#{Temp => {Mode, new_reg_nr(Target)}} + end, + decide_temps(Ts, Part, Costs, Target, Par, Acc). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Eighth pass: Rewrite program performing range splitting. + +-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps()) + -> target_cfg(). +rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps) -> + rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps, TCFG). + +-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps(), target_cfg()) + -> target_cfg(). +rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames, + _Temps, TCFG) -> + TCFG; +rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps, TCFG0) -> + Code0Rev = lists:reverse(bb_code(BB)), + EntryRen = maps:get(maps:get(L,DSets), Renames), + M3Ren = mode3_block_renameset(L, Avail), + SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren), + Fun = fun(I) -> subst_temps(SubstFun, I, Target) end, + {Code, TCFG} = + case bb_has_call(BB) of + false -> + Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target, + []), + {Code1, TCFG0}; + true -> + CallI0 = hd(Code0Rev), + Succ = bb_succ(BB), + {CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets, + Renames, Temps, CallI0#instr.i, TCFG0), + Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)), + Defout = defbutlast(L, Defs), + SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target), + Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps, + Target, []), + Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]), + {Code2, TCFG1} + end, + TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code), + rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps, + update_bb(TCFG, L, TBB, Target)). + +-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(), + ordsets:ordset(temp()), temps(), target(), + [target_instr()]) + -> [target_instr()]. +rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc; +rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) -> + Acc = + case I of + #instr{i=TI} -> [Fun(TI)|Acc0]; + #mode2_spills{temps=Mode2Spills} -> + add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0); + #mode3_restores{temps=Mode3Restores} -> + add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0) + end, + rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc). + +-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(), + ordsets:ordset(temp()), temps(), [target_instr()]) + -> [target_instr()]. +add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc; +add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) -> + Acc = + case Ren of + #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 -> + case Mode =/= mode3 orelse lists:member(R, M3Ren) of + false -> Acc0; + true -> + #{R := T} = Temps, + SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target), + [SpillInstr|Acc0] + end; + #{} -> + Acc0 + end, + add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc). + +-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(), + [target_instr()]) + -> [target_instr()]. +add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc; +add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) -> + case Ren of + #{R := {mode3, NewName}} -> + #{R := T} = Temps, + RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target), + add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]); + #{} -> + add_mode3_restores(Rs, Target, Ren, Temps, Acc) + end. + +-type rewrite_fun() :: fun((target_instr()) -> target_instr()). +-type subst_fun() :: fun((target_temp()) -> target_temp()). +-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun(). +rewrite_subst_fun(Target, Ren, M3Ren) -> + fun(Temp) -> + Reg = reg_nr(Temp, Target), + case Ren of + #{Reg := {Mode, NewName}} -> + case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of + false -> Temp; + true -> update_reg_nr(NewName, Temp, Target) + end; + #{} -> Temp + end + end. + +-type spillmap() :: [{temp(), target_instr()}]. +-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target()) + -> spillmap(). +mk_spillmap(Ren, Livein, Defout, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)} + end || {Reg, {mode1, NewName}} <- maps:to_list(Ren), + lists:member(Reg, Livein), defsetf_member(Reg, Defout)]. + +-spec mk_restores(ren(), liveset(), liveset(), temps(), target()) + -> [target_instr()]. +mk_restores(Ren, Livein, PLivein, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target) + end || {Reg, {Mode, NewName}} <- maps:to_list(Ren), + ( (Mode =:= mode1 andalso lists:member(Reg, Livein )) + orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))]. + +-spec inject_restores([label()], target(), liveness(), plive(), + part_dsets_map(), renames(), temps(), target_instr(), + target_cfg()) + -> {target_instr(), target_cfg()}. +inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI, + TCFG) -> + {CFTI, TCFG}; +inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0, + TCFG0) -> + Ren = maps:get(maps:get(L,DSets), Renames), + Livein = livein(Liveness, L, Target), + PLivein = plivein(L, PLive), + {CFTI, TCFG} = + case mk_restores(Ren, Livein, PLivein, Temps, Target) of + [] -> {CFTI0, TCFG0}; % optimisation + Restores -> + RestBBLbl = new_label(Target), + Code = Restores ++ [mk_goto(L, Target)], + CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target), + TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target), + {CFTI1, TCFG1} + end, + inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI, + TCFG). + +%% Heuristic. Move spills up until we meet the edge of the BB or a definition of +%% that temp. +-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()]) + -> [target_instr()]. +lift_spills([], _Target, SpillMap, Acc) -> + [SpillI || {_, SpillI} <- SpillMap] ++ Acc; +lift_spills([I|Is], Target, SpillMap0, Acc) -> + Def = reg_defines(I, Target), + {Spills0, SpillMap} = + lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0), + Spills = [SpillI || {_, SpillI} <- Spills0], + lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]). + +reg_defines(I, Target) -> + reg_names(defines(I,Target), Target). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Costs ADT +%% +%% Keeps track of cumulative cost of spilling temps in particular partitions +%% using particular spill modes. +-type cost_map() :: #{[part_key()|temp()] => float()}. +-type cost_key() :: entry1 | entry2 | exit | spill | restore. +-record(costs, {entry1 = #{} :: cost_map() + ,entry2 = #{} :: cost_map() + ,exit = #{} :: cost_map() + ,spill = #{} :: cost_map() + ,restore = #{} :: cost_map() + }). +-type costs() :: #costs{}. + +-spec costs_new() -> costs(). +costs_new() -> #costs{}. + +-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs()) + -> costs(). +costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) -> + Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)}; +costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) -> + Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)}; +costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) -> + Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)}; +costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) -> + Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)}; +costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) -> + Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}. + +costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) -> + lists:foldl(fun(Live, CostMap1) -> + map_update_counter([A|Live], Weight, CostMap1) + end, CostMap0, Liveset). + +-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}. +costs_map_roots(DSets0, Costs) -> + {Entry1, DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1), + {Entry2, DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2), + {Exit, DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit), + {Spill, DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill), + {Restore, DSets} = costs_map_roots_1(DSets4, Costs#costs.restore), + {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore}, + DSets}. + +costs_map_roots_1(DSets0, CostMap) -> + {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) -> + {AR, DSets2} = hipe_dsets:find(A, DSets1), + {{[AR|T], Wt}, DSets2} + end, DSets0, maps:to_list(CostMap)), + {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}. + +maps_from_list_merge([], _MF, Acc) -> Acc; +maps_from_list_merge([{K,V}|Ps], MF, Acc) -> + maps_from_list_merge(Ps, MF, case Acc of + #{K := OV} -> Acc#{K := MF(V, OV)}; + #{} -> Acc#{K => V} + end). + +-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float(). +costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) -> + costs_query_1(Temp, Part, Entry1); +costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) -> + costs_query_1(Temp, Part, Entry2); +costs_query(Temp, exit, Part, #costs{exit=Exit}) -> + costs_query_1(Temp, Part, Exit); +costs_query(Temp, spill, Part, #costs{spill=Spill}) -> + costs_query_1(Temp, Part, Spill); +costs_query(Temp, restore, Part, #costs{restore=Restore}) -> + costs_query_1(Temp, Part, Restore). + +costs_query_1(Temp, Part, CostMap) -> + Key = [Part|Temp], + case CostMap of + #{Key := Wt} -> Wt; + #{} -> 0.0 + end. + +-spec map_update_counter(Key, number(), #{Key => number(), OK => OV}) + -> #{Key := number(), OK => OV}. +map_update_counter(Key, Incr, Map) -> + case Map of + #{Key := Orig} -> Map#{Key := Orig + Incr}; + #{} -> Map#{Key => Incr} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Def and use counting ADT +-type ducount() :: #{temp() => float()}. + +-spec ducount_new() -> ducount(). +ducount_new() -> #{}. + +-spec ducount_add([temp()], float(), ducount()) -> ducount(). +ducount_add([], _Weight, DUCount) -> DUCount; +ducount_add([T|Ts], Weight, DUCount0) -> + DUCount = + case DUCount0 of + #{T := Count} -> DUCount0#{T := Count + Weight}; + #{} -> DUCount0#{T => Weight} + end, + ducount_add(Ts, Weight, DUCount). + +ducount_to_list(DUCount) -> maps:to_list(DUCount). + +-spec ducount_merge(ducount(), ducount()) -> ducount(). +ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCA), DCB); +ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCB), DCA). + +ducount_merge_1([], DUCount) -> DUCount; +ducount_merge_1([{T,AC}|Ts], DUCount0) -> + DUCount = + case DUCount0 of + #{T := BC} -> DUCount0#{T := AC + BC}; + #{} -> DUCount0#{T => AC} + end, + ducount_merge_1(Ts, DUCount). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(def_use). +?TGT_IFACE_1(defines). +?TGT_IFACE_1(defines_all_alloc). +?TGT_IFACE_1(is_precoloured). +?TGT_IFACE_1(mk_goto). +?TGT_IFACE_2(mk_move). +?TGT_IFACE_0(new_label). +?TGT_IFACE_0(new_reg_nr). +?TGT_IFACE_1(number_of_temporaries). +?TGT_IFACE_3(redirect_jmp). +?TGT_IFACE_1(reg_nr). +?TGT_IFACE_1(reverse_postorder). +?TGT_IFACE_2(subst_temps). +?TGT_IFACE_3(update_bb). +?TGT_IFACE_2(update_reg_nr). + +branch_preds(Instr, {TgtMod,TgtCtx}) -> + merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)). + +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)). + +merge_sorted_preds([]) -> []; +merge_sorted_preds([{L, P1}, {L, P2}|LPs]) -> + merge_sorted_preds([{L, P1+P2}|LPs]); +merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)]. + +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index 5d32073437..29ef3adcc2 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -99,11 +99,19 @@ call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod, TargetCtx, Options) end. -do_range_split(CFG0, Liveness, TgtMod, TgtCtx, Options) -> - case proplists:get_bool(ra_restore_reuse, Options) of +do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) -> + {CFG2, Liveness1} = + case proplists:get_bool(ra_restore_reuse, Options) of + true -> + CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx), + {CFG1, TgtMod:analyze(CFG1, TgtCtx)}; + false -> + {CFG0, Liveness0} + end, + case proplists:get_bool(ra_range_split, Options) of true -> - CFG1 = hipe_restore_reuse:split(CFG0, Liveness, TgtMod, TgtCtx), - {CFG1, TgtMod:analyze(CFG1, TgtCtx)}; + CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options), + {CFG3, TgtMod:analyze(CFG3, TgtCtx)}; false -> - {CFG0, Liveness} + {CFG2, Liveness1} end. diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl index 2d8dea2a2c..2158bd185e 100644 --- a/lib/hipe/regalloc/hipe_restore_reuse.erl +++ b/lib/hipe/regalloc/hipe_restore_reuse.erl @@ -36,6 +36,14 @@ -export([split/4]). +%% Exports for hipe_range_split, which uses restore_reuse as one possible spill +%% "mode" +-export([analyse/3 + ,renamed_in_block/2 + ,split_in_block/2 + ]). +-export_type([avail/0]). + -compile(inline). %% -define(DO_ASSERT, 1). @@ -59,7 +67,7 @@ split(CFG, Liveness, TargetMod, TargetContext) -> rewrite(CFG, Target, Avail). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --type avail() :: #{label() => avail_bb()}. +-opaque avail() :: #{label() => avail_bb()}. -record(avail_bb, { %% Blocks where HasCall is true are considered to have too high @@ -332,6 +340,11 @@ rewrite([L|Ls], Target, Avail, Input0, CFG0) -> Input1, CFG1), rewrite(Ls, Target, Avail, Input, CFG). +-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()). +renamed_in_block(L, Avail) -> + ordsets:union([avail_self(L, Avail), want_in(L, Avail), + want_out(L, Avail)]). + -spec split_in_block(label(), avail()) -> ordsets:ordset(reg()). split_in_block(L, Avail) -> ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)), |