%% -*- erlang-indent-level: 2 -*- %% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2016. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%@doc %% PREPASS FOR ITERATED REGISTER ALLOCATORS %% %% Implements a trivial partial but optimal fast register allocator to be used %% as the first pass of the register allocation loop. %% %% The idea is to drastically reduce the number of temporaries, so as to speed %% up the real register allocators. %% %% * Spills trivially unallocatable temps %% This relies on the fact that calls intentionally clobber all registers. %% Since this is the case, any temp that is alive over a call can't possibly %% be allocated to a %% %% TODO: Partition the IG (without constructing it) into connected components %% and call the actual register allocator on them individually. -module(hipe_regalloc_prepass). -export([regalloc/6]). %% -ifndef(DEBUG). %% -compile(inline). %% -endif. %%-define(DO_ASSERT, 1). -include("../main/hipe.hrl"). %% We present a "pseudo-target" to the register allocator we wrap. %% Note: all arities are +1 as we're currently using the parameterised module %% facility to store context data. -export([analyze/2, all_precoloured/1, allocatable/1, args/2, bb/3, breadthorder/2, def_use/2, defines/2, is_fixed/2, % used by hipe_graph_coloring_regalloc is_global/2, is_move/2, is_precoloured/2, labels/2, livein/3, liveout/3, non_alloc/2, number_of_temporaries/2, physical_name/2, postorder/2, reg_nr/2, uses/2, var_range/2, reverse_postorder/2]). %% Eww, parameterised module. Can we fix it without having to touch all the %% register allocators? -record(?MODULE, {target :: module() ,sub :: sub_map() % Translates temp numbers found in CFG and understood by % Target to temp numbers passed to RegAllocMod. ,inv :: inv_map() % Translates temp numbers passed to RegAllocMod % to temp numbers found in CFG and understood by % Target ,max_phys :: temp() % Exclusive upper bound on physical registers }). -record(cfg, {cfg :: target_cfg() ,bbs :: #{label() => hipe_bb:bb(instr())} ,max_reg :: temp() % Exclusive upper bound on temp numbers ,live :: target_liveness() }). -record(instr, {tgt_instr :: target_instr() % Not used, for debugging ,defuse :: {[temp()], [temp()]} ,is_move :: boolean() }). %% -record(label, %% {no :: label() %% }). -type instr() :: #instr{} %% | #label{} . -type target_cfg() :: any(). -type target_instr() :: any(). -type target_temp() :: any(). -type target_reg() :: non_neg_integer(). -type target_liveness() :: any(). -type spillno() :: non_neg_integer(). -type temp() :: non_neg_integer(). -type label() :: non_neg_integer(). -spec regalloc(module(), target_cfg(), spillno(), spillno(), module(), proplists:proplist()) -> {hipe_map(), spillno(), target_liveness()}. regalloc(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options) -> Liveness = Target:analyze(CFG), {BBs0, LivePseudos, _Unused, SpilledPseudos, SpillIndex} = scan_cfg(CFG, Liveness, SpillIndex0, Target), {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = number_and_map(Target:all_precoloured(), LivePseudos, SpillLimit), BBs = transform_cfg(BBs0, SubMap), SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, live=Liveness}, SubTarget = #?MODULE{target=Target, max_phys=MaxPhys, inv=InvMap, sub=SubMap}, case RegAllocMod:regalloc(SubMod, SpillIndex, SubSpillLimit, SubTarget, Options) of {SubColoring, NewSpillIndex} -> ok; {SubColoring, NewSpillIndex, _SubLiveness} -> ok end, ?ASSERT(check_coloring(SubColoring, SubMod, SubTarget)), % blame the RA ;) SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpilledPseudos)], Coloring = translate_coloring(SubColoring, InvMap) ++ SpillColors, ?ASSERT(check_coloring(Coloring, CFG, Target)), % Sanity-check ?ASSERT(just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options, Coloring, _Unused)), {Coloring, NewSpillIndex, Liveness}. number_and_map(Phys, Pseud, SpillLimit) -> MaxPhys = lists:max(Phys) + 1, case Pseud of [] -> ok; _ -> case lists:min(Pseud) of % Assertion FirstPseudo when FirstPseudo >= MaxPhys -> ok end end, NrPseuds = length(Pseud), MaxR = MaxPhys+NrPseuds, PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)), MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped ++ PseudNrs, SubMap = maps:from_list(MapList), InvMap = maps:from_list([{Fake, Real} || {Real, Fake} <- MapList]), LastPseudo = case Pseud of [] -> MaxPhys-1; _ -> lists:max(Pseud) end, SubSpillLimit = if SpillLimit > LastPseudo -> MaxR; true -> map_get(SpillLimit, SubMap) end, {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}. -record(spill_state, {map :: spill_map() ,ix :: spillno() }). -type spill_state() :: #spill_state{}. -type spill_map() :: #{target_reg() => spillno()}. -spec scan_cfg(target_cfg(), target_liveness(), spillno(), module()) -> {#{label() => [instr()]} ,ordsets:set(target_reg()) ,ordsets:set(target_reg()) ,spill_map() ,spillno() }. scan_cfg(CFG, Liveness, SpillIndex0, Target) -> State0 = #spill_state{map=#{}, ix=SpillIndex0}, {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} = scan_bbs(Target:labels(CFG), CFG, Liveness, #{}, State0, #{}, Target), SeenOSet = ordsets:from_list(maps:keys(Seen)), SpillOSet = ordsets:from_list(maps:keys(Spill)), PhysOSet = ordsets:from_list(Target:all_precoloured()), Dead = ordsets:union(SpillOSet, PhysOSet), LivePseudos = ordsets:subtract(SeenOSet, Dead), {_TMin, _TMax} = Target:var_range(CFG), _Unused = ordsets:subtract(lists:seq(_TMin, _TMax), ordsets:union(SeenOSet, PhysOSet)), {BBs, LivePseudos, _Unused, Spill, SpillIndex}. -type seen() :: #{target_reg() => []}. % set -spec scan_bbs([label()], target_cfg(), target_liveness(), seen(), spill_state(), #{label() => [instr()]}, module()) -> {#{label() => [instr()]}, seen(), spill_state()}. scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _Target) -> {BBs, Seen, State}; scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, Target) -> {Code, _, Seen, State} = scan_bb(hipe_bb:code(Target:bb(CFG, L)), t_liveout(Liveness, L, Target), Seen0, State0, Target), scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>Code}, Target). t_liveout(Liveness, L, Target) -> %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted %% or consist of a few sorted subsequences (per type) ordsets:from_list(reg_names(Target:liveout(Liveness, L), Target)). -spec reg_names([target_temp()], module()) -> [target_reg()]. reg_names(Regs, Target) -> [Target:reg_nr(X) || X <- Regs]. scan_bb([], Live, Seen, State, _Target) -> {[], Live, Seen, State}; scan_bb([I|Is0], Live0, Seen0, State0, Target) -> %% We could pass Target in the return tuple to make the stack frames %% smaller, but without multireturn opt, it's probably not worth it. {Is1, Live1, Seen1, State1} = scan_bb(Is0, Live0, Seen0, State0, Target), {TDef, TUse} = Target:def_use(I), Def = ordsets:from_list(reg_names(TDef, Target)), Use = ordsets:from_list(reg_names(TUse, Target)), Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live1, Def)), Seen = add_seen(Def, add_seen(Use, Seen1)), State = case Target:defines_all_alloc(I) of false -> State1; true -> spill_all(ToSpill, Target, State1) end, NewI = #instr{tgt_instr=I, defuse={Def, Use}, is_move=Target:is_move(I)}, {[NewI|Is1], Live, Seen, State}. add_seen([], Seen) -> Seen; add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}). spill_all([], _Target, State) -> State; spill_all([R|Rs], Target, State=#spill_state{map=Map, ix=Ix}) -> case Target:is_precoloured(R) or maps:is_key(R, Map) of true -> spill_all(Rs, Target, State); false -> spill_all(Rs, Target, State#spill_state{map=Map#{R=>Ix}, ix=Ix+1}) end. transform_cfg(BBs0, SubMap) -> maps:map(fun(_, BB) -> transform_bb(BB, SubMap) end, BBs0). transform_bb(BB, SubMap) -> hipe_bb:mk_bb([I#instr{defuse={map_get_all_partial(Def, SubMap), map_get_all_partial(Use, SubMap)}} || I = #instr{defuse={Def,Use}} <- BB]). -spec translate_coloring(hipe_map(), _) -> hipe_map(). translate_coloring(SubColoring, InvMap) -> lists:map(fun({T, P}) -> {map_get(T, InvMap), P} end, SubColoring). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Interference graph partitioning %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% We partition the program %% The algorithm considers two kinds of components; those that are local to a %% basic block, and those that are not. The key is that any basic block belongs %% to at most two non-local components; one from the beginning to the first %% split point, and one from the end to the last split point. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Temp map ADT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -type sub_map() :: #{target_temp() => temp()}. -type inv_map() :: #{temp() => target_temp()}. map_get(Temp, Map) when is_integer(Temp) -> maps:get(Temp, Map). %% map_get_all(Ts, Map) -> [map_get(T, Map) || T <- Ts]. map_get_all_partial([], _) -> []; map_get_all_partial([T|Ts], Map) when is_integer(T) -> case Map of #{T := R} -> [R|map_get_all_partial(Ts, Map)]; #{} -> map_get_all_partial(Ts, Map) end. -ifdef(DO_ASSERT). %%%%%%%%%%%%%%%%%%%% %% Check that the coloring is correct (if the IG is correct): %% %% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. -define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). -define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). -define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). -define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). check_coloring(Coloring, CFG, Target) -> ?report0("checking coloring ~p~n",[Coloring]), IG = hipe_ig:build(CFG, Target), check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)), init_coloring(Coloring, Target)). init_coloring(Xs, Target) -> hipe_temp_map:cols2tuple(Xs, Target). check_color_of(X, Cols) -> %% if %% is_precoloured(X) -> %% phys_reg_color(X,Cols); %% true -> case hipe_temp_map:find(X, Cols) of unknown -> ?WARNING_MSG("node ~p: color not found~n", [X]), uncolored; C -> C end. check_cols([], _Cols) -> ?report("coloring valid~n",[]), true; check_cols([{X,Neighbours}|Xs], Cols) -> Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours], C = check_color_of(X, Cols), case valid_coloring(X, C, Cs) of yes -> check_cols(Xs, Cols); {no,Invalids} -> ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), check_cols(Xs, Cols) andalso false end. valid_coloring(_X, _C, []) -> yes; valid_coloring(X, C, [{Y,C}|Ys]) -> case valid_coloring(X, C, Ys) of yes -> {no, [Y]}; {no,Zs} -> {no, [Y|Zs]} end; valid_coloring(X, C, [_|Ys]) -> valid_coloring(X, C, Ys). %%%%%%%%%%%%%%%%%%%% %% Check that no register allocation opportinities were missed due to ?MODULE %% just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options, Coloring, Unused) -> {CheckColoring, _} = RegAllocMod:regalloc(CFG, SpillIndex0, SpillLimit, Target, Options), Now = lists:sort([vt(C) || C <- Coloring]), Check = lists:sort([vt(C) || C={R,_} <- CheckColoring, not ordsets:is_element(R, Unused)]), case Now of Check -> true; _ -> io:fwrite(standard_error, "Colorings differ (~w, sub: ~w, full: ~w)!~n" %% "Sub:~w~n" "Unused: ~w~n" "Now:~w~nCorrect:~w~n", [Target, count(Coloring), count(CheckColoring), %% SubColoring, Unused, Now -- Check, Check -- Now]), false end. count(C) -> {length([[] || {_, {reg, _}} <- C]), length([[] || {_, {spill, _}} <- C])}. vt({R, {reg, _}}) -> {R, reg}; vt({R, {spill, _}}) -> {R, spill}. -endif. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Pseudo-target interface %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% analyze(Cfg, _ModRec) -> Cfg. bb(#cfg{bbs=BBs}, Ix, _ModRec) -> maps:get(Ix, BBs). args(Arity, #?MODULE{target=Target, sub=SubM}) -> map_get(Target:args(Arity), SubM). labels(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:labels(CFG). livein(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) -> map_get_all_partial(reg_names(Target:livein(Live, Lb), Target), SubM). % Should we precompute? liveout(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) -> map_get_all_partial(reg_names(Target:liveout(Live, Lb), Target), SubM). % dito uses(I, MR) -> element(2, def_use(I, MR)). defines(I, MR) -> element(1, def_use(I, MR)). def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse. is_move(#instr{is_move=IM}, _ModRec) -> IM. is_fixed(Reg, #?MODULE{target=Target,inv=InvM}) -> Target:is_fixed(map_get(Reg, InvM)). % XXX: Is this hot? is_global(Reg, #?MODULE{target=Target,max_phys=MaxPhys}) when Reg < MaxPhys -> Target:is_global(Reg). % assume id-map is_precoloured(Reg, #?MODULE{max_phys=MaxPhys}) -> Reg < MaxPhys. reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally) non_alloc(#cfg{cfg=CFG}, #?MODULE{target=Target,sub=SubM}) -> map_get_all_partial(reg_names(Target:non_alloc(CFG), Target), SubM). number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR. allocatable(#?MODULE{target=Target}) -> Target:allocatable(). % assume id-map physical_name(Reg, _ModRec) -> Reg. % XXX: is this correct? all_precoloured(#?MODULE{target=Target}) -> Target:all_precoloured(). % dito var_range(#cfg{cfg=CFG, max_reg=MaxReg}, #?MODULE{target=Target}) -> {0, _} = Target:var_range(CFG), {0, MaxReg-1}. % What is the correct answer? breadthorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:breadthorder(CFG). postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:postorder(CFG). reverse_postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:reverse_postorder(CFG).