diff options
author | Magnus Lång <[email protected]> | 2016-05-28 20:22:34 +0200 |
---|---|---|
committer | Magnus Lång <[email protected]> | 2016-09-02 15:04:45 +0200 |
commit | 85bd166647e7f260fd665eb44da9151c0d88f208 (patch) | |
tree | 84878ab624fcc22246bae89b75a6857415508539 /lib | |
parent | c2f8b61ca3682281752fa0984699214dfcbf7ccd (diff) | |
download | otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.gz otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.bz2 otp-85bd166647e7f260fd665eb44da9151c0d88f208.zip |
hipe: Add hipe_regalloc_prepass
hipe_regalloc_prepass speeds up register allocation by spilling any temp
that is live over a call (which clobbers all register).
In order to detect these, a new function was added to the target
interface; defines_all_alloc/1, that takes an instruction and returns a
boolean.
Diffstat (limited to 'lib')
22 files changed, 490 insertions, 11 deletions
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index ada5311453..7c6965b938 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -253,6 +253,9 @@ ret(N) -> _ -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87() UNION +%% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in +%% hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?RAX,tagged},{?RAX,untagged}, % does the RA strip the type or not? {?RDX,tagged},{?RDX,untagged}, diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl index f57b0e601c..f92cf4f82a 100644 --- a/lib/hipe/arm/hipe_arm_defuse.erl +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -22,6 +22,7 @@ -module(hipe_arm_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1]). -include("hipe_arm.hrl"). %%% @@ -55,6 +56,12 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_arm:mk_temp(R, T) || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()]. diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl index dcf039676b..3ecf2f2fdb 100644 --- a/lib/hipe/arm/hipe_arm_registers.erl +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -180,6 +180,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, {?R1,tagged},{?R1,untagged}, diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index 6c3a2741b3..e4796368c3 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -145,6 +145,7 @@ hipe_profile, hipe_reg_worklists, hipe_regalloc_loop, + hipe_regalloc_prepass, hipe_rtl, hipe_rtl_arch, hipe_rtl_arith_32, diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl index 77b84dc574..305e88488d 100644 --- a/lib/hipe/ppc/hipe_ppc_defuse.erl +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -22,6 +22,7 @@ -module(hipe_ppc_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). -include("hipe_ppc.hrl"). @@ -52,6 +53,9 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(#pseudo_call{}) -> true; +insn_defs_all_gpr(_) -> false. + call_clobbered_gpr() -> [hipe_ppc:mk_temp(R, T) || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -116,6 +120,9 @@ insn_def_fpr(I) -> _ -> [] end. +insn_defs_all_fpr(#pseudo_call{}) -> true; +insn_defs_all_fpr(_) -> false. + call_clobbered_fpr() -> [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl index f4781d5ed7..8f6d9779fc 100644 --- a/lib/hipe/ppc/hipe_ppc_registers.erl +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -201,6 +201,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_ppc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, %% R1 is reserved for C diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index ceb535f1c7..209f230a9b 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -51,6 +51,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \ hipe_coalescing_regalloc \ hipe_graph_coloring_regalloc \ hipe_regalloc_loop \ + hipe_regalloc_prepass \ hipe_ls_regalloc \ hipe_ppc_specific hipe_ppc_specific_fp \ hipe_sparc_specific hipe_sparc_specific_fp \ diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 2e5804337d..6ef79ce95d 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -33,6 +33,7 @@ liveout/2, uses/1, defines/1, + defines_all_alloc/1, def_use/1, is_arg/1, %% used by hipe_ls_regalloc is_move/1, @@ -174,6 +175,8 @@ defines(I) -> hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. +defines_all_alloc(I) -> hipe_amd64_defuse:insn_defs_all(I). + is_move(Instruction) -> case hipe_x86:is_fmove(Instruction) of true -> diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index e04d80f690..c696668399 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -40,6 +40,7 @@ ,livein/2 ,uses/1 ,defines/1 + ,defines_all_alloc/1 ]). %% for hipe_graph_coloring_regalloc: @@ -129,6 +130,9 @@ defines(I) -> [X || X <- hipe_arm_defuse:insn_def_gpr(I), hipe_arm:temp_is_allocatable(X)]. +defines_all_alloc(I) -> + hipe_arm_defuse:insn_defs_all_gpr(I). + is_move(Instruction) -> case hipe_arm:is_pseudo_move(Instruction) of true -> diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index 988501c96f..79ca333865 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -40,6 +40,7 @@ ,livein/2 ,uses/1 ,defines/1 + ,defines_all_alloc/1 ]). %% for hipe_graph_coloring_regalloc: @@ -129,6 +130,9 @@ defines(I) -> [X || X <- hipe_ppc_defuse:insn_def_gpr(I), hipe_ppc:temp_is_allocatable(X)]. +defines_all_alloc(I) -> + hipe_ppc_defuse:insn_defs_all_gpr(I). + is_move(Instruction) -> case hipe_ppc:is_pseudo_move(Instruction) of true -> diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 6f00111777..4bf1385e0f 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -40,6 +40,7 @@ ,livein/2 ,uses/1 ,defines/1 + ,defines_all_alloc/1 ]). %% for hipe_graph_coloring_regalloc: @@ -120,6 +121,9 @@ uses(I) -> defines(I) -> hipe_ppc_defuse:insn_def_fpr(I). +defines_all_alloc(I) -> + hipe_ppc_defuse:insn_defs_all_fpr(I). + is_move(I) -> hipe_ppc:is_pseudo_fmove(I). diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index fa42cdd0fb..33a7130ff1 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -43,7 +43,8 @@ ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> alloc(Defun, CFG, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> ?inc_counter(ra_iteration_counter, 1), {Coloring, _NewSpillIndex, Liveness} = - RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit,TargetMod, Options), + hipe_regalloc_prepass:regalloc( + RegAllocMod, CFG, SpillIndex, SpillLimit, TargetMod, Options), {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), case DidSpill of false -> %% No new temps, we are done. diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl new file mode 100644 index 0000000000..f9df3a68de --- /dev/null +++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl @@ -0,0 +1,400 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% PREPASS FOR ITERATED REGISTER ALLOCATORS +%% +%% Implements a trivial partial but optimal fast register allocator to be used +%% as the first pass of the register allocation loop. +%% +%% The idea is to drastically reduce the number of temporaries, so as to speed +%% up the real register allocators. +%% +%% * Spills trivially unallocatable temps +%% This relies on the fact that calls intentionally clobber all registers. +%% Since this is the case, any temp that is alive over a call can't possibly +%% be allocated to a +%% +%% TODO: Partition the IG (without constructing it) into connected components +%% and call the actual register allocator on them individually. + +-module(hipe_regalloc_prepass). +-export([regalloc/6]). + +%% -ifndef(DEBUG). +%% -compile(inline). +%% -endif. + +%%-define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +%% We present a "pseudo-target" to the register allocator we wrap. +%% Note: all arities are +1 as we're currently using the parameterised module +%% facility to store context data. +-export([analyze/2, + all_precoloured/1, + allocatable/1, + args/2, + bb/3, + breadthorder/2, + def_use/2, + defines/2, + is_fixed/2, % used by hipe_graph_coloring_regalloc + is_global/2, + is_move/2, + is_precoloured/2, + labels/2, + livein/3, + liveout/3, + non_alloc/2, + number_of_temporaries/2, + physical_name/2, + postorder/2, + reg_nr/2, + uses/2, + var_range/2, + reverse_postorder/2]). + +%% Eww, parameterised module. Can we fix it without having to touch all the +%% register allocators? +-record(?MODULE, + {target :: module() + ,sub :: sub_map() % Translates temp numbers found in CFG and understood by + % Target to temp numbers passed to RegAllocMod. + ,inv :: inv_map() % Translates temp numbers passed to RegAllocMod + % to temp numbers found in CFG and understood by + % Target + ,max_phys :: temp() % Exclusive upper bound on physical registers + }). + +-record(cfg, + {cfg :: target_cfg() + ,bbs :: #{label() => hipe_bb:bb(instr())} + ,max_reg :: temp() % Exclusive upper bound on temp numbers + ,live :: target_liveness() + }). + +-record(instr, + {tgt_instr :: target_instr() % Not used, for debugging + ,defuse :: {[temp()], [temp()]} + ,is_move :: boolean() + }). +%% -record(label, +%% {no :: label() +%% }). +-type instr() :: #instr{} %% | #label{} + . + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type target_reg() :: non_neg_integer(). +-type target_liveness() :: any(). +-type spillno() :: non_neg_integer(). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec regalloc(module(), target_cfg(), spillno(), spillno(), module(), + proplists:proplist()) + -> {hipe_map(), spillno(), target_liveness()}. +regalloc(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options) -> + Liveness = Target:analyze(CFG), + {BBs0, LivePseudos, _Unused, SpilledPseudos, SpillIndex} = + scan_cfg(CFG, Liveness, SpillIndex0, Target), + + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = + number_and_map(Target:all_precoloured(), LivePseudos, SpillLimit), + BBs = transform_cfg(BBs0, SubMap), + SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, live=Liveness}, + SubTarget = #?MODULE{target=Target, max_phys=MaxPhys, inv=InvMap, sub=SubMap}, + case + RegAllocMod:regalloc(SubMod, SpillIndex, SubSpillLimit, SubTarget, Options) + of + {SubColoring, NewSpillIndex} -> ok; + {SubColoring, NewSpillIndex, _SubLiveness} -> ok + end, + ?ASSERT(check_coloring(SubColoring, SubMod, SubTarget)), % blame the RA ;) + SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpilledPseudos)], + Coloring = translate_coloring(SubColoring, InvMap) ++ SpillColors, + ?ASSERT(check_coloring(Coloring, CFG, Target)), % Sanity-check + ?ASSERT(just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, + Options, Coloring, _Unused)), + {Coloring, NewSpillIndex, Liveness}. + +number_and_map(Phys, Pseud, SpillLimit) -> + MaxPhys = lists:max(Phys) + 1, + case Pseud of [] -> ok; _ -> + case lists:min(Pseud) of % Assertion + FirstPseudo when FirstPseudo >= MaxPhys -> ok + end end, + NrPseuds = length(Pseud), + MaxR = MaxPhys+NrPseuds, + PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)), + MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped + ++ PseudNrs, + SubMap = maps:from_list(MapList), + InvMap = maps:from_list([{Fake, Real} || {Real, Fake} <- MapList]), + LastPseudo = case Pseud of [] -> MaxPhys-1; _ -> lists:max(Pseud) end, + SubSpillLimit = if SpillLimit > LastPseudo -> MaxR; + true -> map_get(SpillLimit, SubMap) + end, + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}. + +-record(spill_state, + {map :: spill_map() + ,ix :: spillno() + }). +-type spill_state() :: #spill_state{}. +-type spill_map() :: #{target_reg() => spillno()}. + +-spec scan_cfg(target_cfg(), target_liveness(), spillno(), module()) + -> {#{label() => [instr()]} + ,ordsets:set(target_reg()) + ,ordsets:set(target_reg()) + ,spill_map() + ,spillno() + }. +scan_cfg(CFG, Liveness, SpillIndex0, Target) -> + State0 = #spill_state{map=#{}, ix=SpillIndex0}, + {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} = + scan_bbs(Target:labels(CFG), CFG, Liveness, #{}, State0, #{}, Target), + SeenOSet = ordsets:from_list(maps:keys(Seen)), + SpillOSet = ordsets:from_list(maps:keys(Spill)), + PhysOSet = ordsets:from_list(Target:all_precoloured()), + Dead = ordsets:union(SpillOSet, PhysOSet), + LivePseudos = ordsets:subtract(SeenOSet, Dead), + {_TMin, _TMax} = Target:var_range(CFG), + _Unused = ordsets:subtract(lists:seq(_TMin, _TMax), + ordsets:union(SeenOSet, PhysOSet)), + {BBs, LivePseudos, _Unused, Spill, SpillIndex}. + +-type seen() :: #{target_reg() => []}. % set + +-spec scan_bbs([label()], target_cfg(), target_liveness(), seen(), + spill_state(), #{label() => [instr()]}, module()) + -> {#{label() => [instr()]}, seen(), spill_state()}. +scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _Target) -> {BBs, Seen, State}; +scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, Target) -> + {Code, _, Seen, State} = scan_bb(hipe_bb:code(Target:bb(CFG, L)), + t_liveout(Liveness, L, Target), + Seen0, State0, Target), + scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>Code}, Target). + +t_liveout(Liveness, L, Target) -> + %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted + %% or consist of a few sorted subsequences (per type) + ordsets:from_list(reg_names(Target:liveout(Liveness, L), Target)). + +-spec reg_names([target_temp()], module()) -> [target_reg()]. +reg_names(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. + +scan_bb([], Live, Seen, State, _Target) -> {[], Live, Seen, State}; +scan_bb([I|Is0], Live0, Seen0, State0, Target) -> + %% We could pass Target in the return tuple to make the stack frames + %% smaller, but without multireturn opt, it's probably not worth it. + {Is1, Live1, Seen1, State1} = scan_bb(Is0, Live0, Seen0, State0, Target), + {TDef, TUse} = Target:def_use(I), + Def = ordsets:from_list(reg_names(TDef, Target)), + Use = ordsets:from_list(reg_names(TUse, Target)), + Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live1, Def)), + Seen = add_seen(Def, add_seen(Use, Seen1)), + State = + case Target:defines_all_alloc(I) of + false -> State1; + true -> spill_all(ToSpill, Target, State1) + end, + NewI = #instr{tgt_instr=I, defuse={Def, Use}, is_move=Target:is_move(I)}, + {[NewI|Is1], Live, Seen, State}. + +add_seen([], Seen) -> Seen; +add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}). + +spill_all([], _Target, State) -> State; +spill_all([R|Rs], Target, State=#spill_state{map=Map, ix=Ix}) -> + case Target:is_precoloured(R) or maps:is_key(R, Map) of + true -> spill_all(Rs, Target, State); + false -> spill_all(Rs, Target, State#spill_state{map=Map#{R=>Ix}, ix=Ix+1}) + end. + +transform_cfg(BBs0, SubMap) -> + maps:map(fun(_, BB) -> transform_bb(BB, SubMap) end, BBs0). + +transform_bb(BB, SubMap) -> + hipe_bb:mk_bb([I#instr{defuse={map_get_all_partial(Def, SubMap), + map_get_all_partial(Use, SubMap)}} + || I = #instr{defuse={Def,Use}} <- BB]). + +-spec translate_coloring(hipe_map(), _) -> hipe_map(). +translate_coloring(SubColoring, InvMap) -> + lists:map(fun({T, P}) -> {map_get(T, InvMap), P} end, SubColoring). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Interference graph partitioning +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% We partition the program + +%% The algorithm considers two kinds of components; those that are local to a +%% basic block, and those that are not. The key is that any basic block belongs +%% to at most two non-local components; one from the beginning to the first +%% split point, and one from the end to the last split point. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Temp map ADT +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-type sub_map() :: #{target_temp() => temp()}. +-type inv_map() :: #{temp() => target_temp()}. +map_get(Temp, Map) when is_integer(Temp) -> maps:get(Temp, Map). + +%% map_get_all(Ts, Map) -> [map_get(T, Map) || T <- Ts]. + +map_get_all_partial([], _) -> []; +map_get_all_partial([T|Ts], Map) when is_integer(T) -> + case Map of + #{T := R} -> [R|map_get_all_partial(Ts, Map)]; + #{} -> map_get_all_partial(Ts, Map) + end. + +-ifdef(DO_ASSERT). +%%%%%%%%%%%%%%%%%%%% +%% Check that the coloring is correct (if the IG is correct): +%% + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +check_coloring(Coloring, CFG, Target) -> + ?report0("checking coloring ~p~n",[Coloring]), + IG = hipe_ig:build(CFG, Target), + check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)), + init_coloring(Coloring, Target)). + +init_coloring(Xs, Target) -> + hipe_temp_map:cols2tuple(Xs, Target). + +check_color_of(X, Cols) -> +%% if +%% is_precoloured(X) -> +%% phys_reg_color(X,Cols); +%% true -> + case hipe_temp_map:find(X, Cols) of + unknown -> + ?WARNING_MSG("node ~p: color not found~n", [X]), + uncolored; + C -> + C + end. + +check_cols([], _Cols) -> + ?report("coloring valid~n",[]), + true; +check_cols([{X,Neighbours}|Xs], Cols) -> + Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours], + C = check_color_of(X, Cols), + case valid_coloring(X, C, Cs) of + yes -> + check_cols(Xs, Cols); + {no,Invalids} -> + ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), + check_cols(Xs, Cols) andalso false + end. + +valid_coloring(_X, _C, []) -> + yes; +valid_coloring(X, C, [{Y,C}|Ys]) -> + case valid_coloring(X, C, Ys) of + yes -> {no, [Y]}; + {no,Zs} -> {no, [Y|Zs]} + end; +valid_coloring(X, C, [_|Ys]) -> + valid_coloring(X, C, Ys). + +%%%%%%%%%%%%%%%%%%%% +%% Check that no register allocation opportinities were missed due to ?MODULE +%% +just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options, + Coloring, Unused) -> + {CheckColoring, _} = RegAllocMod:regalloc(CFG, SpillIndex0, SpillLimit, + Target, Options), + Now = lists:sort([vt(C) || C <- Coloring]), + Check = lists:sort([vt(C) || C={R,_} <- CheckColoring, + not ordsets:is_element(R, Unused)]), + case Now of + Check -> true; + _ -> + io:fwrite(standard_error, "Colorings differ (~w, sub: ~w, full: ~w)!~n" + %% "Sub:~w~n" + "Unused: ~w~n" + "Now:~w~nCorrect:~w~n", + [Target, count(Coloring), count(CheckColoring), + %% SubColoring, + Unused, + Now -- Check, Check -- Now]), + false + end. + +count(C) -> {length([[] || {_, {reg, _}} <- C]), + length([[] || {_, {spill, _}} <- C])}. + +vt({R, {reg, _}}) -> {R, reg}; +vt({R, {spill, _}}) -> {R, spill}. +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Pseudo-target interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +analyze(Cfg, _ModRec) -> Cfg. +bb(#cfg{bbs=BBs}, Ix, _ModRec) -> maps:get(Ix, BBs). +args(Arity, #?MODULE{target=Target, sub=SubM}) -> + map_get(Target:args(Arity), SubM). +labels(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:labels(CFG). +livein(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) -> + map_get_all_partial(reg_names(Target:livein(Live, Lb), Target), SubM). % Should we precompute? +liveout(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) -> + map_get_all_partial(reg_names(Target:liveout(Live, Lb), Target), SubM). % dito +uses(I, MR) -> element(2, def_use(I, MR)). +defines(I, MR) -> element(1, def_use(I, MR)). +def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse. +is_move(#instr{is_move=IM}, _ModRec) -> IM. +is_fixed(Reg, #?MODULE{target=Target,inv=InvM}) -> + Target:is_fixed(map_get(Reg, InvM)). % XXX: Is this hot? +is_global(Reg, #?MODULE{target=Target,max_phys=MaxPhys}) when Reg < MaxPhys -> + Target:is_global(Reg). % assume id-map +is_precoloured(Reg, #?MODULE{max_phys=MaxPhys}) -> Reg < MaxPhys. +reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally) +non_alloc(#cfg{cfg=CFG}, #?MODULE{target=Target,sub=SubM}) -> + map_get_all_partial(reg_names(Target:non_alloc(CFG), Target), SubM). +number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR. +allocatable(#?MODULE{target=Target}) -> Target:allocatable(). % assume id-map +physical_name(Reg, _ModRec) -> Reg. % XXX: is this correct? +all_precoloured(#?MODULE{target=Target}) -> Target:all_precoloured(). % dito +var_range(#cfg{cfg=CFG, max_reg=MaxReg}, #?MODULE{target=Target}) -> + {0, _} = Target:var_range(CFG), + {0, MaxReg-1}. % What is the correct answer? + +breadthorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> + Target:breadthorder(CFG). +postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:postorder(CFG). +reverse_postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> + Target:reverse_postorder(CFG). diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 29d0908caf..7df1bbe113 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -40,6 +40,7 @@ ,livein/2 ,uses/1 ,defines/1 + ,defines_all_alloc/1 ]). %% for hipe_graph_coloring_regalloc: @@ -129,6 +130,9 @@ defines(I) -> [X || X <- hipe_sparc_defuse:insn_def_gpr(I), hipe_sparc:temp_is_allocatable(X)]. +defines_all_alloc(I) -> + hipe_sparc_defuse:insn_defs_all_gpr(I). + is_move(Instruction) -> case hipe_sparc:is_pseudo_move(Instruction) of true -> diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 08c2541b41..fd80053708 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -40,6 +40,7 @@ ,livein/2 ,uses/1 ,defines/1 + ,defines_all_alloc/1 ]). %% for hipe_graph_coloring_regalloc: @@ -120,6 +121,9 @@ uses(I) -> defines(I) -> hipe_sparc_defuse:insn_def_fpr(I). +defines_all_alloc(I) -> + hipe_sparc_defuse:insn_defs_all_fpr(I). + is_move(I) -> hipe_sparc:is_pseudo_fmove(I). diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl index 4085a0e1a7..171c2fb019 100644 --- a/lib/hipe/regalloc/hipe_temp_map.erl +++ b/lib/hipe/regalloc/hipe_temp_map.erl @@ -33,7 +33,7 @@ -module(hipe_temp_map). --export([cols2tuple/2, is_spilled/2, to_substlist/1]). +-export([cols2tuple/2, find/2, is_spilled/2, to_substlist/1]). -include("../main/hipe.hrl"). @@ -50,12 +50,10 @@ -spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map(). cols2tuple(Map, Target) -> - ?ASSERT(check_list(Map)), - SortedMap = lists:keysort(1, Map), + SortedMap = lists:keysort(1, Map), cols2tuple(0, SortedMap, [], Target). %% sorted_cols2tuple(Map, Target) -> -%% ?ASSERT(check_list(Map)), %% ?ASSERT(Map =:= lists:keysort(1, Map)), %% cols2tuple(0, Map, [], Target). @@ -66,7 +64,7 @@ cols2tuple(_, [], Vs, _) -> cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R -> %% N makes sure the mapping is dense. N is he next key. cols2tuple(N+1, Ms, [C|Vs], Target); -cols2tuple(N, SourceMapping, Vs, Target) -> +cols2tuple(N, SourceMapping=[{R,_}|_], Vs, Target) when N < R -> %% The source was sparse, make up some placeholders... Val = case Target:is_precoloured(N) of @@ -82,7 +80,7 @@ cols2tuple(N, SourceMapping, Vs, Target) -> -spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean(). is_spilled(Temp, Map) -> - case element(Temp+1, Map) of + case find(Temp, Map) of {reg, _R} -> false; {fp_reg, _R}-> false; {spill, _N} -> true; @@ -106,9 +104,10 @@ is_spilled(Temp, Map) -> %% {spill, _N} -> false; %% unknown -> false %% end. -%% -%% %% Returns the inf temp Temp is mapped to. -%% find(Temp, Map) -> element(Temp+1, Map). + +%% Returns the inf temp Temp is mapped to. +find(Temp, Map) when Temp < tuple_size(Map) -> element(Temp+1, Map); +find(_, Map) when is_tuple(Map) -> unknown. % consistency with cols2tuple/2 %% diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index f1007c95ed..2720af92c1 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -47,6 +47,7 @@ liveout/2, uses/1, defines/1, + defines_all_alloc/1, def_use/1, is_arg/1, % used by hipe_ls_regalloc is_move/1, @@ -176,6 +177,8 @@ defines(I) -> hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double']. +defines_all_alloc(I) -> ?HIPE_X86_DEFUSE:insn_defs_all(I). + is_move(Instruction) -> case hipe_x86:is_move(Instruction) of true -> diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index 0c022d5a27..cd4d02491d 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -53,6 +53,7 @@ liveout/2, uses/1, defines/1, + defines_all_alloc/1, is_global/1, reg_nr/1, physical_name/1, @@ -162,6 +163,8 @@ defines(I) -> hipe_x86:temp_is_allocatable(X), temp_is_double(X)]. +defines_all_alloc(I) -> hipe_amd64_defuse:insn_defs_all(I). + temp_is_double(Temp) -> hipe_x86:temp_type(Temp) =:= 'double'. diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl index 4f66299f1d..4b5a19a19d 100644 --- a/lib/hipe/sparc/hipe_sparc_defuse.erl +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -23,6 +23,7 @@ -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -include("hipe_sparc.hrl"). %%% @@ -51,6 +52,12 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_sparc:mk_temp(R, T) || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -115,6 +122,12 @@ insn_def_fpr(I) -> _ -> [] end. +insn_defs_all_fpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_fpr() -> [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()]. diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl index 6681a10070..20138836dd 100644 --- a/lib/hipe/sparc/hipe_sparc_registers.erl +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -249,6 +249,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_sparc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [%% ?G0 is the non-allocatable constant zero {?G1,tagged},{?G1,untagged}, diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 9cba6cbe4b..4455def74e 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -35,7 +35,7 @@ -endif. -module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_use/1]). %% src_use/1]). +-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]). -include("../x86/hipe_x86.hrl"). %%% @@ -64,6 +64,16 @@ insn_def(I) -> _ -> [] end. + +%% @doc Answers whether instruction I defines all allocatable registers. Used by +%% hipe_regalloc_prepass. +-spec insn_defs_all(_) -> boolean(). +insn_defs_all(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + dst_def(Dst) -> case Dst of #x86_temp{} -> [Dst]; diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl index 179d734501..f00bbfb280 100644 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -224,6 +224,8 @@ ret(N) -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of +%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? {?EDX,tagged},{?EDX,untagged}, |