From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/hipe/opt/Makefile | 101 +++ lib/hipe/opt/hipe_schedule.erl | 1489 ++++++++++++++++++++++++++++++++++ lib/hipe/opt/hipe_schedule_prio.erl | 58 ++ lib/hipe/opt/hipe_spillmin.erl | 111 +++ lib/hipe/opt/hipe_spillmin_color.erl | 556 +++++++++++++ lib/hipe/opt/hipe_spillmin_scan.erl | 559 +++++++++++++ lib/hipe/opt/hipe_target_machine.erl | 93 +++ lib/hipe/opt/hipe_ultra_mod2.erl | 239 ++++++ lib/hipe/opt/hipe_ultra_prio.erl | 304 +++++++ 9 files changed, 3510 insertions(+) create mode 100644 lib/hipe/opt/Makefile create mode 100644 lib/hipe/opt/hipe_schedule.erl create mode 100644 lib/hipe/opt/hipe_schedule_prio.erl create mode 100644 lib/hipe/opt/hipe_spillmin.erl create mode 100644 lib/hipe/opt/hipe_spillmin_color.erl create mode 100644 lib/hipe/opt/hipe_spillmin_scan.erl create mode 100644 lib/hipe/opt/hipe_target_machine.erl create mode 100644 lib/hipe/opt/hipe_ultra_mod2.erl create mode 100644 lib/hipe/opt/hipe_ultra_prio.erl (limited to 'lib/hipe/opt') diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile new file mode 100644 index 0000000000..972cf63944 --- /dev/null +++ b/lib/hipe/opt/Makefile @@ -0,0 +1,101 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan + +HRL_FILES= +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +$(EBIN)/hipe_spillmin.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_spillmin_color.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_spillmin_scan.beam: ../main/hipe.hrl ../flow/cfg.hrl diff --git a/lib/hipe/opt/hipe_schedule.erl b/lib/hipe/opt/hipe_schedule.erl new file mode 100644 index 0000000000..4925b2927b --- /dev/null +++ b/lib/hipe/opt/hipe_schedule.erl @@ -0,0 +1,1489 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% INSTRUCTION SCHEDULER +%% +%% This is a basic ILP cycle scheduler: +%% * set cycle = 0 +%% * while ready[cycle] nonempty do +%% - take x with greatest priority from ready[cycle] +%% - try to schedule x; +%% * if scheduling x was possible, +%% - reserve resources +%% - add x to schedule and delete x from dag +%% - update earliest-time for all successor nodes +%% as max[earliest[y],cycle+latency[x]] +%% - if some node y now has no predecessors, +%% add y to ready[earliest[y]] +%% * if it was impossible, put x in ready[cycle+1] +%% (= try again) +%% +%% We use the following data structures: +%% 1. all nodes are numbered and indices used as array keys +%% 2. priority per node can be computed statically or dynamically +%% * statically: before scheduling, each node gets a priority value +%% * dynamically: at each cycle, compute priorities for all ready nodes +%% 3. earliest: earliest cycle of issue, starts at 0 +%% and is updated as predecessors issue +%% 4. predecessors: number of predecessors (0 = ready to issue) +%% 5. successors: list of {Latency,NodeID} +%% 6. ready: an array indexed by cycle-time (integer), where +%% ready nodes are kept. +%% 7. resources: a resource representation (ADT) that answers +%% certain queries, e.g., "can x be scheduled this cycle" +%% and "reserve resources for x". +%% 8. schedule: list of scheduled instructions {Instr,Cycle} +%% in the order of issue +%% 9. instructions: maps IDs back to instructions +%% +%% Inputs: +%% - a list of {ID,Node} pairs (where ID is a unique key) +%% - a dependence list {ID0,Latency,ID1}, which is used to +%% build the DAG. +%% +%% Note that there is some leeway in how things are represented +%% from here. +%% +%% MODIFICATIONS: +%% - Some basic blocks are not worth scheduling (e.g., GC save/restore code) +%% yet are pretty voluminous. How do we skip them? +%% - Scheduling should be done at finalization time: when basic block is +%% linearized and is definitely at Sparc assembly level, THEN reorder +%% stuff. + +-module(hipe_schedule). +-export([cfg/1, est_cfg/1, delete_node/5]). + +-include("../sparc/hipe_sparc.hrl"). + +%%-define(debug1,true). + +-define(debug2(Str,Args),ok). +%%-define(debug2(Str,Args),io:format(Str,Args)). + +-define(debug3(Str,Args),ok). +%%-define(debug3(Str,Args),io:format(Str,Args)). + +-define(debug4(Str,Args),ok). +%%-define(debug4(Str,Args),io:format(Str,Args)). + +-define(debug5(Str,Args),ok). +%%-define(debug5(Str,Args),io:format(Str,Args)). + +-define(debug(Str,Args),ok). +%%-define(debug(Str,Args),io:format(Str,Args)). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cfg +%% Argument : CFG - the control flow graph +%% Returns : CFG - A new cfg with scheduled blocks +%% Description : Takes each basic block and schedules them one by one. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cfg(CFG) -> + ?debug3("CFG: ~n~p", [CFG]), + update_all( [ {L, + hipe_bb:mk_bb( + block(L,hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))) )} + || L <- hipe_sparc_cfg:labels(CFG) ], CFG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_all +%% Argument : Blocks - [{Label, Block}] , a list with labels and new code +%% used for updating the old CFG. +%% CFG - The old controlflow graph +%% Returns : An updated controlflow graph. +%% Description : Just swappes the basic blocks in the CFG to the scheduled one. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_all([],CFG) -> CFG; +update_all([{L,NewB}|Ls],CFG) -> + update_all(Ls,hipe_sparc_cfg:bb_add(CFG,L,NewB)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +est_cfg(CFG) -> + update_all([ {L, hipe_bb:mk_bb(est_block(hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))))} + || L <- hipe_sparc_cfg:labels(CFG) ], CFG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Provides an estimation of how quickly a block will execute. +%% This is done by chaining all instructions in sequential order +%% by 0-cycle dependences (which means they will never be reordered), +%% then scheduling the mess. + +est_block([]) -> []; +est_block([I]) -> [I]; +est_block(Blk) -> + {IxBlk,DAG} = est_deps(Blk), + Sch = bb(IxBlk,DAG), + separate_block(Sch,IxBlk). + +est_deps(Blk) -> + IxBlk = indexed_bb(Blk), + DAG = deps(IxBlk), + {IxBlk, chain_instrs(IxBlk,DAG)}. + +chain_instrs([{N,_}|Xs],DAG) -> + chain_i(N,Xs,DAG). + +chain_i(_,[],DAG) -> DAG; +chain_i(N,[{M,_}|Xs],DAG) -> + NewDAG = dep_arc(N,zero_latency(),M,DAG), + chain_i(M,Xs,NewDAG). + +zero_latency() -> 0. + +lookup_instr([{N,I}|_], N) -> I; +lookup_instr([_|Xs], N) -> lookup_instr(Xs, N). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : block +%% Argument : Instrs - [Instr], list of all the instructions in a basic +%% block. +%% Returns : A new scheduled block +%% Description : Schedule a basic block +%% +%% Note: does not consider delay slots! +%% (another argument for using only annulled delay slots?) +%% * how do we add delay slots? somewhat tricky to +%% reconcile with the sort of scheduling we consider. +%% (as-early-as-possible) +%% => rewrite scheduler into as-late-as-possible? +%% (=> just reverse the dependence arcs??) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Don't fire up the scheduler if there's no work to do. +block(_, []) -> + []; +block(_L, [I]) -> + case hipe_sparc:is_any_branch(I) of + true -> [hipe_sparc:nop_create(), I]; + false -> [I] + end; +block(_L, Blk) -> + IxBlk = indexed_bb(Blk), + case IxBlk of + [{_N, I}] -> % comments and nops may have been removed. + case hipe_sparc:is_any_branch(I) of + true -> [hipe_sparc:nop_create(), I]; + false -> [I] + end; + _ -> + Sch = bb(IxBlk, {DAG, _Preds} = deps(IxBlk)), + {NewSch, NewIxBlk} = fill_delays(Sch, IxBlk, DAG), + X = finalize_block(NewSch, NewIxBlk), + debug1_stuff(Blk, DAG, IxBlk, Sch, X), + X + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_delays +%% Argument : Sch - List of {{cycle, C}, {node, N}} : C = current cycle +%% N = node index +%% IxBlk - Indexed block [{N, Instr}] +%% DAG - Dependence graph +%% Returns : {NewSch, NewIxBlk} - vector with new schedule and vector +%% with {N, Instr} +%% Description : Goes through the schedule from back to front looking for +%% branches/jumps. If one is found fill_del tries to find +%% an instr to fill the delayslot. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_delays(Sch, IxBlk, DAG) -> + NewIxBlk = hipe_vectors:list_to_vector(IxBlk), + %% NewSch = hipe_vectors:list_to_vector(Sch), + NewSch = fill_del(length(Sch), hipe_vectors:list_to_vector(Sch), + NewIxBlk, DAG), + {NewSch, NewIxBlk}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_del +%% Argument : N - current index in the schedule +%% Sch - schedule +%% IxBlk - indexed block +%% DAG - dependence graph +%% Returns : Sch - New schedule with possibly a delay instr in the last +%% position. +%% Description : If a call/jump is found fill_branch_delay/fill_call_delay +%% is called to find a delay-filler. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_del(N, Sch, _IxBlk, _DAG) when N < 1 -> Sch; +fill_del(N, Sch, IxBlk, DAG) -> + Index = get_index(Sch, N), + ?debug2("Index for ~p: ~p~nInstr: ~p~n", + [N, Index, get_instr(IxBlk, Index)]), + NewSch = + case get_instr(IxBlk, Index) of + #call_link{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #jmp_link{} -> + fill_call_delay(N - 1, N, Sch, IxBlk, DAG); + #jmp{} -> + fill_call_delay(N - 1, N, Sch, IxBlk, DAG); + #b{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #br{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + #goto{} -> + fill_branch_delay(N - 1, N, Sch, IxBlk, DAG); + _Other -> + Sch + end, + NewSch. + %% fill_del(N - 1, NewSch, IxBlk, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_call_delay +%% Argument : Cand - index in schedule of delay-candidate +%% Call - index in schedule of call +%% Sch - schedule vector: < {{cycle,Ci},{node,Nj}}, ... > +%% IxBlk - block vector: < {N, Instr1}, {N+1, Instr2} ... > +%% DAG - dependence graph +%% Returns : Sch - new updated schedule. +%% Description : Searches backwards through the schedule trying to find an +%% instr without conflicts with the Call-instr. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_call_delay(Cand, _Call, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch; +fill_call_delay(Cand, Call, Sch, IxBlk, DAG) -> + CandIndex = get_index(Sch, Cand), + CallIndex = get_index(Sch, Call), + CandI = get_instr(IxBlk, CandIndex), + case move_or_alu(CandI) of + true -> + case single_depend(CandIndex, CallIndex, DAG) of + false -> % Other instrs depends on Cand ... + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG); + + true -> + CallI = get_instr(IxBlk, CallIndex), + + CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)), + %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)), + %% CallDefs = ordsets:from_list(hipe_sparc:defines(CallI)), + CallUses = ordsets:from_list(hipe_sparc:uses(CallI)), + + Args = case CallI of + #jmp_link{} -> + ordsets:from_list( + hipe_sparc:jmp_link_args(CallI)); + #jmp{} -> + ordsets:from_list(hipe_sparc:jmp_args(CallI)); + #call_link{} -> + ordsets:from_list( + hipe_sparc:call_link_args(CallI)) + end, + CallUses2 = ordsets:subtract(CallUses, Args), + Conflict = ordsets:intersection(CandDefs, CallUses2), + %% io:format("single_depend -> true:~n ~p~n, ~p~n,~p~n",[CandI,CallI,DAG]), + %% io:format("Cand = ~p~nCall = ~p~n",[CandI,CallI]), + %% io:format("CandDefs = ~p~nCallDefs = ~p~n",[CandDefs,CallDefs]), + %% io:format("CandUses = ~p~nCallUses = ~p~n",[CandUses,CallUses]), + %% io:format("Args = ~p~nCallUses2 = ~p~n",[Args,CallUses2]), + %% io:format("Conflict = ~p~n",[Conflict]), + + case Conflict of + [] -> % No conflicts ==> Cand can fill delayslot after Call + update_schedule(Cand, Call, Sch); + _ -> % Conflict: try with preceeding instrs + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG) + end + end; + false -> + fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : fill_branch_delay +%% Argument : Cand - index in schedule of delay-candidate +%% Branch - index in schedule of branch +%% Sch - schedule +%% IxBlk - indexed block +%% DAG - dependence graph +%% Returns : Sch - new updated schedule. +%% Description : Searches backwards through the schedule trying to find an +%% instr without conflicts with the Branch-instr. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fill_branch_delay(Cand, _Br, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch; +fill_branch_delay(Cand, Br, Sch, IxBlk, DAG) -> + CandIndex = get_index(Sch, Cand), + BrIndex = get_index(Sch, Br), + CandI = get_instr(IxBlk, CandIndex), + case move_or_alu(CandI) of + true -> + case single_depend(CandIndex, BrIndex, DAG) of + false -> % Other instrs depends on Cand ... + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG); + + true -> + BrI = get_instr(IxBlk, BrIndex), + CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)), + %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)), + %% BrDefs = ordsets:from_list(hipe_sparc:defines(BrI)), + BrUses = ordsets:from_list(hipe_sparc:uses(BrI)), + + Conflict = ordsets:intersection(CandDefs, BrUses), + %% io:format("single_depend -> true: ~p~n, ~p~n,~p~n", [CandI, BrI, DAG]), + %% io:format("Cand = ~p~nBr = ~p~n",[CandI,BrI]), + %% io:format("CandDefs = ~p~nBrDefs = ~p~n",[CandDefs,BrDefs]), + %% io:format("CandUses = ~p~nBrUses = ~p~n",[CandUses,BrUses]), + %% io:format("Conflict = ~p~n",[Conflict]); + + case Conflict of + [] -> % No conflicts ==> + % Cand can fill delayslot after Branch + update_schedule(Cand, Br, Sch); + _ -> % Conflict: try with preceeding instrs + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG) + end + end; + false -> + fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_schedule +%% Argument : From - the position from where to switch indexes in Sch +%% To - the position to where to switch indexes in Sch +%% Sch - schedule +%% Returns : Sch - an updated schedule +%% Description : If From is the delay-filler and To is the Call/jump, the +%% schedule is updated so From gets index To, To gets index +%% To - 1, and the nodes between From and To gets old_index - 1. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_schedule(To, To, Sch) -> + {{cycle, C}, {node, _N} = Node} = hipe_vectors:get(Sch, To-1), + hipe_vectors:set(Sch, To-1, {{cycle, C+1}, Node}); +update_schedule(From, To, Sch) -> + Temp = hipe_vectors:get(Sch, From-1), + Sch1 = hipe_vectors:set(Sch, From-1, hipe_vectors:get(Sch, From)), + update_schedule(From + 1, To, hipe_vectors:set(Sch1, From, Temp)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : single_depend +%% Argument : N - Index of the delayslot candidate +%% M - Index of the node that N possibly has a single +%% depend to. +%% DAG - The dependence graph +%% Returns : true if no other nodes than N os depending on N +%% Description : Checks that no other nodes than M depends on N and that the +%% latency between them is zero or 1. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +single_depend(N, M, DAG) -> + Deps = hipe_vectors:get(DAG, N-1), + single_depend(M, Deps). + +single_depend(_N, []) -> true; +single_depend(N, [{0, N}]) -> true; +single_depend(N, [{1, N}]) -> true; +single_depend(_N, [{_Lat, _}|_]) -> false. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_index +%% Argument : Sch - schedule +%% N - index in schedule +%% Returns : Index - index of the node +%% Description : Returns the index of the node on position N in the schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_index(Sch, N) -> + {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch,N-1), + Index. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_instr +%% Argument : IxBlk - indexed block +%% N - index in block +%% Returns : Instr +%% Description : Returns the instr on position N in the indexed block. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_instr(IxBlk, N) -> + {_, Instr} = hipe_vectors:get(IxBlk, N-1), + Instr. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_instr +%% Argument : Sch - schedule +%% IxBlk - indexed block +%% N - index in schedule +%% Returns : Instr +%% Description : Returns the instr on position N in the schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +get_instr(Sch, IxBlk, N) -> + {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch, N-1), + {_, Instr} = hipe_vectors:get(IxBlk, Index-1), + Instr. + +separate_block(Sch,IxBlk) -> + sep_comments([{C,lookup_instr(IxBlk,N)} || {{cycle,C},{node,N}} <- Sch]). + +sep_comments([]) -> []; +sep_comments([{C,I}|Xs]) -> + [hipe_sparc:comment_create({cycle,C}), I | sep_comments(Xs,C)]. + +sep_comments([], _) -> []; +sep_comments([{C1,I}|Xs], C0) -> + if + C1 > C0 -> + [hipe_sparc:comment_create({cycle,C1}),I|sep_comments(Xs,C1)]; + true -> + [I|sep_comments(Xs, C0)] + end. + +finalize_block(Sch, IxBlk) -> + ?debug5("Sch: ~p~nIxBlk: ~p~n",[Sch,IxBlk]), + finalize_block(1, hipe_vectors:size(Sch), 1, Sch, IxBlk, []). + +finalize_block(N, End, _C, Sch, IxBlk, _Instrs) when N =:= End - 1 -> + NextLast = get_instr(Sch, IxBlk, N), + Last = get_instr(Sch, IxBlk, End), + ?debug5("NextLast: ~p~nLast: ~p~n",[NextLast,Last]), + case hipe_sparc:is_any_branch(Last) of + true -> % Couldn't fill delayslot ==> add NOP + [NextLast , hipe_sparc:nop_create(), Last]; + false -> % Last is a delayslot-filler ==> change order... + [Last, NextLast] + end; +finalize_block(N, End, C0, Sch, IxBlk, Instrs) -> + {{cycle, _C1}, {node, _M}} = hipe_vectors:get(Sch, N-1), + Instr = get_instr(Sch, IxBlk, N), + ?debug5("Instr: ~p~n~n",[Instr]), + [Instr | finalize_block(N + 1, End, C0, Sch, IxBlk, Instrs)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : bb +%% Argument : IxBlk - indexed block +%% DAG - {Dag, Preds} where Dag is dependence graph and +%% Preds is number of predecessors for each node. +%% Returns : Sch +%% Description : Initializes earliest-list, ready-list, priorities, resources +%% and so on, and calls the cycle_sched which does the scheduling +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +bb(IxBlk,DAG) -> + bb(length(IxBlk), IxBlk, DAG). + +bb(N,IxBlk,{DAG, Preds}) -> + Earliest = init_earliest(N), + BigArray = N*10, % "nothing" is this big :-) + Ready = hipe_schedule_prio:init_ready(BigArray,Preds), + I_res = init_instr_resources(N, IxBlk), + + Prio = hipe_schedule_prio:init_instr_prio(N,DAG), + Rsrc = init_resources(BigArray), + ?debug4("I_res: ~n~p~nPrio: ~n~p~nRsrc: ~n~p~n", [I_res,Prio,Rsrc]), + ?debug('cycle 1~n',[]), + Sch = empty_schedule(), + cycle_sched(1,Ready,DAG,Preds,Earliest,Rsrc,I_res,Prio,Sch,N,IxBlk). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cycle_sched +%% Argument : - C is current cycle, 1 or more. +%% - Ready is an array (Cycle -> [Node]) +%% yielding the collection of nodes ready to be +%% scheduled in a cycle. +%% - DAG is an array (Instr -> [{Latency,Instr}]) +%% represents the dependence DAG. +%% - Preds is an array (Instr -> NumPreds) +%% counts the number of predecessors +%% (0 preds = ready to be scheduled). +%% - Earl is an array (Instr -> EarliestCycle) +%% holds the earliest cycle an instruction can be scheduled. +%% - Rsrc is a 'resource ADT' that handles scheduler resource +%% management checks whether instruction can be scheduled +%% this cycle without a stall. +%% - I_res is an array (Instr -> Required_resources) +%% holds the resources required to schedule an instruction. +%% - Sch is the representation of the schedule current schedule. +%% - N is the number of nodes remaining to be scheduled +%% tells us when to stop the scheduler. +%% - IxBlk is the indexed block with instrs +%% Returns : present schedule +%% Description : Scheduler main loop. +%% Pick next ready node in priority order for cycle C until +%% none remain. +%% * check each node if it can be scheduled w/o stalling +%% * if so, schedule it +%% * otherwise, bump the node to the next cycle +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cycle_sched(C,Ready,DAG,Preds,Earl,Rsrc,I_res,Prio,Sch,N,IxBlk) -> + case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk,DAG,Preds,Earl) of +% case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk) of + {next,I,Ready1} -> + ?debug('try ~p~n==> ready = ~p~n',[I, Ready1]), + case resources_available(C,I,Rsrc,I_res) of + {yes,NewRsrc} -> + ?debug(' scheduled~n==> Rscrs = ~p~n',[NewRsrc]), + NewSch = add_to_schedule(I,C,Sch), + {ReadyNs,NewDAG,NewPreds,NewEarl} = + delete_node(C,I,DAG,Preds,Earl), + ?debug("NewPreds : ~p~n",[Preds]), + ?debug(' ReadyNs: ~p~n',[ReadyNs]), + NewReady = hipe_schedule_prio:add_ready_nodes(ReadyNs, + Ready1), + ?debug(' New ready: ~p~n',[NewReady]), + cycle_sched(C,NewReady,NewDAG,NewPreds,NewEarl, + NewRsrc,I_res,Prio,NewSch,N-1, IxBlk); + no -> + ?debug(' resource conflict~n',[]), + NewReady = hipe_schedule_prio:insert_node(C+1,I,Ready1), + cycle_sched(C,NewReady,DAG,Preds,Earl,Rsrc, + I_res,Prio,Sch,N,IxBlk) + end; + none -> % schedule next cycle if some node remains + if + N > 0 -> + ?debug('cycle ~p~n',[C+1]), + cycle_sched(C+1,Ready,DAG,Preds,Earl, + advance_cycle(Rsrc), + I_res,Prio,Sch,N, IxBlk); + true -> + present_schedule(Sch) + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_earliest +%% Argument : N - number of instrs +%% Returns : +%% Description : +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_earliest(N) -> + hipe_vectors:new(N,1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Schedule is kept reversed until the end. + +-define(present_node(I,Cycle),{{cycle,Cycle},{node,I}}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_schedule +%% Description : Returns an empty schedule. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_schedule() -> []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_to_schedule +%% Argument : I - instr +%% Cycle - cycle when I was placed +%% Sch - schedule +%% Description : Adds instr to schedule +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_to_schedule(I,Cycle,Sch) -> + [?present_node(I,Cycle)|Sch]. + +present_schedule(Sch) -> lists:reverse(Sch). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to resource manager: +%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_resources +%% Description : Yields a 'big enough' array mapping (Cycle -> Resources); +%% this array is called Rsrc below. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_resources(S) -> + hipe_target_machine:init_resources(S). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : init_instr_resources +%% Argument : Nodes - a list of the instructions +%% N - is the number of nodes +%% Description : return a vector (NodeID -> Resource_requirements) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +init_instr_resources(N,Nodes) -> + hipe_target_machine:init_instr_resources(N,Nodes). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : resources_available +%% Argument : Cycle - the current cycle +%% I - the current instruction (index = NodeID) +%% Rsrc - a map (Cycle -> Resources) +%% I_res - maps (NodeID -> Resource_requirements) +%% Description : returns {yes,NewResTab} | no +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +resources_available(Cycle,I,Rsrc,I_res) -> + hipe_target_machine:resources_available(Cycle,I,Rsrc,I_res). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : advance_cycle +%% Argument : Rsrc - resources +%% Description : Returns an empty resources-state +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +advance_cycle(Rsrc) -> + hipe_target_machine:advance_cycle(Rsrc). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : delete_node +%% Argument : Cycle - current cycle +%% I - index of instr +%% DAG - dependence dag +%% Preds - array with number of predecessors for nodes +%% Earl - array with earliest-times for nodes +%% Returns : {ReadyNs,NewDAG,NewPreds,NewEarl} +%% Description : Deletes node I and updates earliest times for the rest. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +delete_node(Cycle,I,DAG,Preds,Earl) -> + Succ = hipe_vectors:get(DAG,I-1), + NewDAG = hipe_vectors:set(DAG,I-1,scheduled), % provides debug 'support' + {ReadyNs,NewPreds,NewEarl} = update_earliest(Succ,Cycle,Preds,Earl,[]), + ?debug('earliest after ~p: ~p~n',[I,[{Ix+1,V} || {Ix,V} <- hipe_vectors:list(NewEarl)]]), + {ReadyNs,NewDAG,NewPreds,NewEarl}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : update_earliest +%% Argument : Succ - successor list +%% Cycle - current cycle +%% Preds - predecessors +%% Earl - earliest times for nodes +%% Ready - array with readynodes for cycles +%% Returns : {Ready,Preds,Earl} +%% Description : Updates the earliest times for nodes and updates number of +%% predecessors for nodes +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +update_earliest([],_Cycle,Preds,Earl,Ready) -> + {Ready,Preds,Earl}; +update_earliest([{Lat,N}|Xs],Cycle,Preds,Earl,Ready) -> + Old_earl = hipe_vectors:get(Earl,N-1), + New_earl = erlang:max(Old_earl,Cycle+Lat), + NewEarl = hipe_vectors:set(Earl,N-1,New_earl), + Num_preds = hipe_vectors:get(Preds,N-1), + NewPreds = hipe_vectors:set(Preds,N-1,Num_preds-1), + if + Num_preds =:= 0 -> + ?debug('inconsistent DAG~n',[]), + exit({update_earliest,N}); + Num_preds =:= 1 -> + NewReady = [{New_earl,N}|Ready], + NewPreds2 = hipe_vectors:set(NewPreds,N-1,0), + update_earliest(Xs,Cycle,NewPreds2,NewEarl,NewReady); + is_integer(Num_preds), Num_preds > 1 -> + update_earliest(Xs,Cycle,NewPreds,NewEarl,Ready) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Collect instruction dependences. +%% +%% Three forms: +%% - data/register +%% * insert RAW, WAR, WAW dependences +%% - memory +%% * stores serialize memory references +%% * alias analysis may allow loads to bypass stores +%% - control +%% * unsafe operations are 'trapped' between branches +%% * branches are ordered +%% +%% returns { [{Index,Instr}], DepDAG } +%% DepDAG is defined below. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : deps +%% Argument : BB - Basic block +%% Returns : {IxBB,DAG} - indexed block and dependence graph. DAG consists +%% of both Dag and Preds, where Preds is number +%% of predecessors for nodes. +%% Description : Collect instruction dependences. +%% +%% Three forms: +%% - data/register +%% * insert RAW, WAR, WAW dependences +%% - memory +%% * stores serialize memory references +%% * alias analysis may allow loads to bypass stores +%% - control +%% * unsafe operations are 'trapped' between branches +%% * branches are ordered +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +deps(IxBB) -> + N = length(IxBB), + DAG = empty_dag(N), % The DAG contains both dependence-arcs and + % number of predeccessors... + {_DepTab,DAG1} = dd(IxBB, DAG), + DAG2 = md(IxBB, DAG1), + cd(IxBB, DAG2). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_dag +%% Argument : N - number of nodes +%% Returns : empty DAG +%% Description : DAG consists of dependence graph and predeccessors +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_dag(N) -> + {hipe_vectors:new(N, []), hipe_vectors:new(N, 0)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : indexed_bb +%% Argument : BB - basic block +%% Returns : [{N, Instr}] +%% Description : Puts indexes to all instrs of a block, removes comments. +%% NOP's are also removed because if both sparc_schedule and +%% sparc_post_schedule options are used, the first pass will +%% add nop's before the branch if necessary, and these are +%% removed before scheduling the second pass. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +indexed_bb(BB) -> + indexed_bb(BB,1). + +indexed_bb([],_N) -> []; +indexed_bb([X|Xs],N) -> + case X of + #comment{} -> + indexed_bb(Xs,N); + #nop{} -> + indexed_bb(Xs,N); + _Other -> + [{N,X}|indexed_bb(Xs,N+1)] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : dep_arc +%% Argument : N - Current node +%% Lat - Latency from current node to M +%% M - The dependent node +%% DAG - The dependence graph. Consists of both DAG and +%% predeccessors +%% Returns : A new DAG with the arc added and number of predeccessors for +%% M increased. +%% Description : Adds a new arc to the graph, if an older arc goes from N to M +%% it will be replaced with a new arc {max(OldLat, NewLat), M}. +%% Number of predeccessors for node M is increased. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +dep_arc(N, Lat, M, {Dag,Preds}) -> + OldDeps = hipe_vectors:get(Dag, N-1), + %% io:format("{OldDeps} = {~p}~n",[OldDeps]), + {NewDeps, Status} = add_arc(Lat, M, OldDeps), + %% io:format("{NewDeps, Status} = {~p, ~p}~n",[NewDeps, Status]), + NewDag = hipe_vectors:set(Dag, N-1, NewDeps), + NewPreds = case Status of + added -> % just increase preds if new arc was added + OldPreds = hipe_vectors:get(Preds, M-1), + hipe_vectors:set(Preds, M-1, OldPreds + 1); + non_added -> + Preds + end, + {NewDag, NewPreds}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_arc +%% Argument : Lat - The latency from current node to To. +%% To - The instr-id of the node which the dependence goes to +%% Arcs - The dependecies that are already in the dep-graph +%% Returns : A dependence graph sorted by To. +%% Description : A new arc that is added is sorted in the right place, and if +%% there is already an arc between nodes A and B, the one with +%% the greatest latency is choosen. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_arc(Lat,To, []) -> {[{Lat, To}], added}; +add_arc(Lat1, To, [{Lat2, To} | Arcs]) -> + {[{erlang:max(Lat1, Lat2), To} | Arcs], non_added}; +add_arc(Lat1,To1, [{Lat2, To2} | Arcs]) when To1 < To2 -> + {[{Lat1, To1}, {Lat2, To2} | Arcs], added}; +add_arc(Lat1 ,To1, [{Lat2, To2} | Arcs]) -> + {Arcs1, Status} = add_arc(Lat1, To1, Arcs), + {[{Lat2, To2} | Arcs1], Status}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The register/data dependence DAG of a block is represented +%% as a mapping (Variable -> {NextWriter,NextReaders}) +%% where NextWriter is a pair {Ix,Type} +%% and NextReaders is a list of pairs {Ix,Type}. +%% +%% Type is used to determine latencies of operations; on the UltraSparc, +%% latencies of arcs (n -> m) are determined by both n and m. (E.g., if +%% n is an integer op and m is a store, then latency is 0; if m is an +%% integer op, it's 1.) + +dd([],DAG) -> { empty_deptab(), DAG }; +dd([{N,I}|Is],DAG0) -> + {DepTab,DAG1} = dd(Is,DAG0), + add_deps(N,I,DepTab,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_deps +%% Argument : N - current node +%% Instr - current instr +%% DepTab - hashtable with {next-writer, next-readers} for reg +%% DAG - dependence graph +%% Returns : {DepTab, BlockInfo, DAG} - with new values +%% Description : Adds dependencies for node N to the graph. The registers that +%% node N defines and uses are used for computing the +%% dependencies to the following nodes. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_deps(N,Instr,DepTab,DAG) -> + {Ds,Us} = def_use(Instr), + Type = dd_type(Instr), + {DepTab1,DAG1} = add_write_deps(Ds,N,Type,DepTab,DAG), + add_read_deps(Us,N,Type,DepTab1,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Instructions are classified into symbolic categories, +%% which are subsequently used to determine operation latencies +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +dd_type(Instr) -> + case Instr of + #b{} -> branch; + %% #br{} -> branch; + #call_link{} -> branch; + #jmp_link{} -> branch; + #jmp{} -> branch; + #goto{} -> branch; + #load{} -> load; + #store{} -> store; + #alu{} -> alu; + #move{} -> alu; + #multimove{} -> + Src = hipe_sparc:multimove_src(Instr), + Lat = round(length(Src)/2), + {mmove,Lat}; + #sethi{} -> alu; + #alu_cc{} -> alu_cc; + %% #cmov_cc{} -> cmov_cc; + %% #cmov_r{} -> alu; + #load_atom{} -> alu; + #load_address{} -> alu; + #pseudo_enter{} -> pseudo; + #pseudo_pop{} -> pseudo; + #pseudo_return{} -> pseudo; + #pseudo_spill{} -> pseudo; + #pseudo_unspill{} -> pseudo + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_write_deps +%% Argument : Defs - registers that node N defines. +%% N - current node +%% Ty - the type of current instr +%% DepTab - Dependence-table +%% DAG - The dependence graph. +%% Returns : {DepTab,DAG} - with new values +%% Description : Adds dependencies to the graph for nodes that depends on the +%% registers that N defines. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_write_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG}; +add_write_deps([D|Ds],N,Ty,DepTab,DAG) -> + {NewDepTab,NewDAG} = add_write_dep(D,N,Ty,DepTab,DAG), + add_write_deps(Ds,N,Ty,NewDepTab,NewDAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_write_dep +%% Description : Updates the dependence table with N as next writer, and +%% updates the DAG with the dependencies from N to subsequent +%% nodes. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_write_dep(X,N,Ty,DepTab,DAG) -> + {NxtWriter,NxtReaders} = lookup(X,DepTab), + NewDepTab = writer(X,N,Ty,DepTab), + NewDAG = write_deps(N,Ty,NxtWriter,NxtReaders,DAG), + {NewDepTab, NewDAG}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : write_deps +%% Argument : Instr - Current instr +%% Ty - Type of current instr +%% NxtWriter - The node that is the next writer of the ragister +%% that Instr defines. +%% NxtReaders - The nodes that are subsequent readers of the +%% register that N defines. +%% DAG - The dependence graph +%% Returns : Calls raw_deps that finally returns a new DAG with the new +%% dependence arcs added. +%% Description : If a next writer exists a dependence arc for this node is +%% added, and after this raw_deps is called to compute the +%% arcs for read-after-write dependencies. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +write_deps(Instr,Ty,NxtWriter,NxtReaders,DAG) -> + DAG1 = case NxtWriter of + none -> + DAG; + {Instr,_} -> + DAG; + {Wr,WrTy} -> + dep_arc(Instr, + hipe_target_machine:waw_latency(Ty,WrTy), + Wr, DAG) + end, + raw_deps(Instr,Ty,NxtReaders,DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : raw_deps +%% Argument : Instr - current instr +%% Type - type of instr +%% Readers - subsequent readers +%% DAG - dependence graph +%% Returns : DAG - A new DAG with read-after-write dependencies added +%% Description : Updates the DAG with the dependence-arcs from Instr to the +%% subsequent readers, with the appropriate latencies. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +raw_deps(_Instr,_Type,[],DAG) -> DAG; +raw_deps(Instr,Ty,[{Rd,RdTy}|Xs],DAG) -> + raw_deps(Instr,Ty,Xs, + dep_arc(Instr,hipe_target_machine:raw_latency(Ty,RdTy), + Rd,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_read_deps +%% Argument : Uses - The registers that node N uses. +%% N - Index of the current node. +%% Ty - Type of current node. +%% DepTab - Dependence table +%% DAG - Dependence graph +%% Returns : {DepTab, DAG} - with updated values. +%% Description : Adds the read dependencies from node N to subsequent ones, +%% according to the registers that N uses. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_read_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG}; +add_read_deps([U|Us],N,Ty,DepTab,DAG) -> + {NewDepTab,NewDAG} = add_read_dep(U,N,Ty,DepTab,DAG), + add_read_deps(Us,N,Ty,NewDepTab,NewDAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_read_dep +%% Argument : X - Used register +%% N - Index of checked instr +%% Ty - Type of checked instr +%% DepTab - Hashtable with {next-writer, next-readers} +%% DAG - Dependence graph +%% Returns : {DepTab, DAG} - with updated values +%% Description : Looks up what the next-writer/next-readers are, and adjusts +%% the table with current node as new reader. Finally +%% read-dependencies are added to the DAG. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +add_read_dep(X,N,Ty,DepTab,DAG) -> + {NxtWriter,_NxtReaders} = lookup(X,DepTab), + NewDepTab = reader(X,N,Ty,DepTab), + NewDAG = read_deps(N,Ty,NxtWriter,DAG), + {NewDepTab, NewDAG}. + +% If NxtWriter is 'none', then this var is not written subsequently +% Add WAR from Instr to NxtWriter (if it exists) +% *** UNFINISHED *** +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : read_deps +%% Argument : N - Index of current node +%% Ty - Type of current node +%% Writer - tuple {NextWriter, WrType} where NextWriter is the +%% subsequent instr that writes this register next time, +%% and WrType is the type of that instr. +%% DAG - The dependence graph +%% Returns : DAG +%% Description : Returns a new DAG if a next-writer exists, otherwise the old +%% DAG is returned. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +read_deps(_Instr,_Ty,none,DAG) -> + DAG; +read_deps(_Instr,_Ty,{_Instr,_},DAG) -> + DAG; +read_deps(Instr,Ty,{NxtWr,NxtWrTy},DAG) -> + dep_arc(Instr,hipe_target_machine:war_latency(Ty,NxtWrTy),NxtWr, + DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_deptab +%% Description : Creates an empty dependence table (hash-table) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_deptab() -> + gb_trees:empty(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : lookup +%% Argument : X - key (register) +%% DepTab - dependence table +%% Returns : {NextWriter, NextReaders} +%% Description : Returns next writer and a list of following readers on +%% register X. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +lookup(X, DepTab) -> + case gb_trees:lookup(X, DepTab) of + none -> + {none, []}; + {value, {W, Rs} = Val} -> + Val + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : writer +%% Argument : X - key (register) +%% N - index of writer +%% Ty - type of writer +%% DepTab - dependence table to be updated +%% Returns : DepTab - new dependence table +%% Description : Sets N tobe next writer on X +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +writer(X, N, Ty, DepTab) -> + gb_trees:enter(X, {{N, Ty}, []}, DepTab). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : reader +%% Argument : X - key (register) +%% N - index of reader +%% Ty - type of reader +%% DepTab - dependence table to be updated +%% Returns : DepTab - new dependence table +%% Description : Adds N to the dependence table as a reader. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +reader(X,N,Ty,DepTab) -> + {W,Rs} = lookup(X,DepTab), + gb_trees:enter(X,{W,[{N,Ty}|Rs]},DepTab). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The following version of md/2 separates heap- and stack operations, +%% which allows for greater reordering. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% Returns : DAG - new dependence graph +%% Description : Adds arcs for load/store dependencies to the DAG. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md(IxBB, DAG) -> + md(IxBB,empty_md_state(),DAG). + +md([],_,DAG) -> DAG; +md([{N,I}|Is],St,DAG) -> + case md_type(I) of + other -> + md(Is,St,DAG); + {st,T} -> + { WAW_nodes, WAR_nodes, NewSt } = st_overlap(N,T,St), + md(Is,NewSt, + md_war_deps(WAR_nodes,N,md_waw_deps(WAW_nodes,N,DAG))); + {ld,T} -> + { RAW_nodes, NewSt } = ld_overlap(N,T,St), + md(Is,NewSt, + md_raw_deps(RAW_nodes,N,DAG)) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_war_deps +%% Argument : WAR_nodes - write-after-read nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for write-after-read dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_war_deps([],_,DAG) -> DAG; +md_war_deps([M|Ms],N,DAG) -> + md_war_deps(Ms,N,dep_arc(M,hipe_target_machine:m_war_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_waw_deps +%% Argument : WAW_nodes - write-after-write nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for write-after-write dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_waw_deps([],_,DAG) -> DAG; +md_waw_deps([M|Ms],N,DAG) -> + md_waw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_waw_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_raw_deps +%% Argument : RAW_nodes - read-after-write nodes depending on N +%% N - index of current instr +%% DAG - dependence graph +%% Returns : DAG - updated DAG +%% Description : Adds arcs for read-after-write dependencies for N +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_raw_deps([],_,DAG) -> DAG; +md_raw_deps([M|Ms],N,DAG) -> + md_raw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_raw_latency(),N,DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : empty_md_state +%% Description : Returns an empty memorydependence state, eg. 4 lists +%% representing {StackStores, HeapStores, StackLoads, HeapLoads} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +empty_md_state() -> {[], [], [], []}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : md_type +%% Argument : I - instr +%% Description : Maps the instr-type to a simplified type, telling if it's +%% store/load resp. heap or stack. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +md_type(I) -> + case I of + #load{} -> + Sp = hipe_sparc_registers:stack_pointer(), + Src = hipe_sparc:load_src(I), + N = hipe_sparc:reg_nr(Src), + Off = hipe_sparc:load_off(I), + if + N =:= Sp -> % operation on stack + {ld,{sp,Off}}; + true -> + {ld,{hp,Src,Off}} + end; + #store{} -> + Sp = hipe_sparc_registers:stack_pointer(), + Dst = hipe_sparc:store_dest(I), + N = hipe_sparc:reg_nr(Dst), + Off = hipe_sparc:store_off(I), + if + N =:= Sp -> + {st,{sp,Off}}; + true -> + {st,{hp,Dst,Off}} + end; + _ -> + other + end. + +%% Given a memory operation and a 'memory op state', +%% overlap(N,MemOp,State) returns { Preceding_Dependent_Ops, NewState }. +%% which are either a tuple { WAW_deps, WAR_deps } or a list RAW_deps. +%% +%% NOTES: +%% Note that Erlang's semantics ("heap stores never overwrite existing data") +%% means we can be quite free in reordering stores to the heap. +%% Ld/St to the stack are simply handled by their offsets; since we do not +%% rename the stack pointer, this is sufficient. +%% *** We assume all memory ops have uniform size = 4 *** +%% +%% NOTES: +%% The method mentioned above has now been changed because the assumption that +%% "heap stores never overwrite existing data" caused a bug when the +%% process-pointer was treated the same way as the heap. We were also told +%% that the semantics can possibly change in the future, so it would be more +%% safe to treat the heap store/loads as the stack. +%% A future improvement can be to do an alias analysis to give more freedom +%% in reordering stuff... +%% +%% Alias state: +%% { [StackOp], [HeapOp], [StackOp], [HeapOp] } +%% where StackOp = {InstrID, Offset} +%% HeapOp = {InstrID, Reg, Offset} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_overlap +%% Argument : N - Index of current node +%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap +%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] } +%% where StackStrs/StackLds = {InstrID, Offset} +%% and HeapStrs/HeapLds = {InstrID, Reg, Offset} +%% Returns : { DepStrs, DepLds, State } - +%% where DepStrs/DepLds = [NodeId] +%% and State is the new state +%% Description : Adds dependencies for overlapping stores. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + {DepSt, IndepSt_Sp} = st_sp_dep(St_Sp, Off), + {DepLd, IndepLd_Sp} = ld_sp_dep(Ld_Sp, Off), + {DepSt, DepLd, {[{N, Off}|IndepSt_Sp], St_Hp, IndepLd_Sp, Ld_Hp}}; +st_overlap(N, {hp, Dst, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DstOff = {Dst, Off}, + {DepSt,_IndepSt_Hp} = st_hp_dep(St_Hp, DstOff), + {DepLd, IndepLd_Hp} = ld_hp_dep(Ld_Hp, DstOff), + {DepSt, DepLd, {St_Sp, [{N, Dst, Off}|St_Hp], Ld_Sp, IndepLd_Hp}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_overlap +%% Argument : N - Index of current node +%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap +%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] } +%% where StackStrs/StackLds = {InstrID, Offset} +%% and HeapStrs/HeapLds = {InstrID, Reg, Offset} +%% Returns : { DepStrs, State } +%% Description : Adds dependencies for overlapping laods +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DepSt = sp_dep_only(St_Sp, Off), + {DepSt, {St_Sp, St_Hp, [{N, Off}|Ld_Sp], Ld_Hp}}; +ld_overlap(N, {hp, Src, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) -> + DepSt = hp_dep_only(St_Hp, Src, Off), + {DepSt, {St_Sp, St_Hp, Ld_Sp, [{N, Src, Off}|Ld_Hp]}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_sp_dep +%% Description : Adds dependencies that are depending on a stack store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_sp_dep(Stores, Off) -> + sp_dep(Stores, Off, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_sp_dep +%% Description : Adds dependencies that are depending on a stack load +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_sp_dep(Loads, Off) -> + sp_dep(Loads, Off, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : st_hp_dep +%% Description : Adds dependencies that are depending on a heap store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +st_hp_dep(Stores, {_Reg, _Off} = RegOff) -> + hp_dep(Stores, RegOff, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : ld_hp_dep +%% Description : Adds dependencies that are depending on a heap load +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +ld_hp_dep(Loads, {_Reg, _Off} = RegOff) -> + hp_dep(Loads, RegOff, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : sp_dep +%% Description : Returns {Dependent, Independent} which are lists of nodes +%% that depends or not on a stack load/store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +sp_dep([], _Off, Dep, Indep) -> {Dep, Indep}; +sp_dep([{N,Off}|Xs], Off, Dep, Indep) -> + sp_dep(Xs, Off, [N|Dep], Indep); +sp_dep([X|Xs], Off, Dep, Indep) -> + sp_dep(Xs, Off, Dep, [X|Indep]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : hp_dep +%% Description : Returns {Dependent, Independent} which are lists of nodes +%% that depends or not on a heap load/store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +hp_dep([], {_Reg,_Off}, Dep, Indep) -> {Dep,Indep}; +hp_dep([{N,Reg,Off1}|Xs], {Reg,Off}, Dep, Indep) when Off1 =/= Off -> + hp_dep(Xs, {Reg,Off}, Dep, [{N,Reg,Off1}|Indep]); +hp_dep([{N,_,_}|Xs], {Reg,Off}, Dep, Indep) -> + hp_dep(Xs, {Reg,Off}, [N|Dep], Indep). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : sp_dep_only +%% Description : Returns a list of nodes that are depending on a stack store +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +sp_dep_only(Stores, Off) -> + [N || {N,Off0} <- Stores, Off =:= Off0]. + +%% Dependences from heap stores to heap loads. +%% *** UNFINISHED *** +%% - but works +%% This is somewhat subtle: +%% - a heap load can only bypass a heap store if we KNOW it won't +%% load the stored value +%% - unfortunately, we do not know the relationships between registers +%% at this point, so we can't say that store(p+4) is independent of +%% load(q+0). +%% (OR CAN WE? A bit closer reasoning might show that it's possible?) +%% - We can ONLY say that st(p+c) and ld(p+c') are independent when c /= c' +%% +%% (As said before, it might be possible to lighten this restriction?) + +hp_dep_only([], _Reg, _Off) -> []; +hp_dep_only([{_N,Reg,Off_1}|Xs], Reg, Off) when Off_1 =/= Off -> + hp_dep_only(Xs, Reg, Off); +hp_dep_only([{N,_,_}|Xs], Reg, Off) -> + [N|hp_dep_only(Xs, Reg, Off)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Control dependences: +%% - add dependences so that +%% * branches are performed in order +%% * unsafe operations are 'fenced in' by surrounding branches +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% Returns : DAG - new dependence graph +%% Description : Adds conditional dependencies to the DAG +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd(IxBB,DAG) -> + cd(IxBB, DAG, none, [], []). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd +%% Argument : IxBB - indexed block +%% DAG - dependence graph +%% PrevBr - previous branch +%% PrevUnsafe - previous unsafe instr (mem-op) +%% PrevOthers - previous other instrs, used to "fix" preceeding +%% instrs so they don't bypass a branch. +%% Returns : DAG - new dependence graph +%% Description : Adds conditional dependencies to the graph. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd([], DAG, _PrevBr, _PrevUnsafe, _PrevOthers) -> + DAG; +cd([{N,I}|Xs], DAG, PrevBr, PrevUnsafe, PrevOthers) -> + case cd_type(I) of + {branch,Ty} -> + DAG1 = cd_branch_to_other_deps(N, PrevOthers, DAG), + NewDAG = cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG1), + cd(Xs,NewDAG,{N,Ty},[],[]); + {unsafe,Ty} -> + NewDAG = cd_unsafe_deps(PrevBr,N,Ty,DAG), + cd(Xs, NewDAG, PrevBr, [{N,Ty}|PrevUnsafe], PrevOthers); + {other,_Ty} -> + cd(Xs, DAG, PrevBr, PrevUnsafe, [N|PrevOthers]) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_branch_to_other_deps +%% Argument : N - index of branch +%% Ms - list of indexes of "others" preceeding instrs +%% DAG - dependence graph +%% Returns : DAG - new graph +%% Description : Makes preceeding instrs fixed so they don't bypass a branch +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_branch_to_other_deps(_, [], DAG) -> + DAG; +cd_branch_to_other_deps(N, [M | Ms], DAG) -> + cd_branch_to_other_deps(N, Ms, dep_arc(M, zero_latency(), N, DAG)). + +%% Is the operation a branch, an unspeculable op or something else? + +%% Returns +%% {branch,BranchType} +%% {unsafe,OpType} +%% {other,OpType} + +%% *** UNFINISHED *** +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_type +%% Argument : I - instr +%% Description : Maps instrs to a simpler type. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_type(I) -> + case I of + #goto{} -> + {branch,uncond}; + #br{} -> + {branch,'cond'}; + #b{} -> + {branch,'cond'}; + #call_link{} -> + {branch,call}; + #jmp_link{} -> + {branch,call}; + #jmp{} -> + {branch,call}; + #load{} -> + {unsafe,load}; + #store{} -> + {unsafe,load}; + T -> + {other,T} + end. + +%% add dependences to keep order of branches + unspeculable ops: +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_branch_deps +%% Argument : PrevBr - preceeding branch +%% PrevUnsafe - preceeding unsafe ops, eg, mem-ops +%% N - current id. +%% Ty - type of current instr +%% DAG - dependence graph +%% Returns : DAG - new DAG +%% Description : Adds arcs between branches and calls deps_to_unsafe that adds +%% arcs between branches and unsafe ops. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG) -> + DAG1 = case PrevBr of + none -> + DAG; + {Br,BrTy} -> + dep_arc(Br, + hipe_target_machine:br_br_latency(BrTy,Ty), + N, DAG) + end, + deps_to_unsafe(PrevUnsafe, N, Ty, DAG1). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : deps_to_unsafe +%% Description : Adds dependencies between unsafe's and branches +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +deps_to_unsafe([], _, _, DAG) -> DAG; +deps_to_unsafe([{M,UTy}|Us], N, Ty, DAG) -> + deps_to_unsafe(Us,N,Ty, + dep_arc(M, hipe_target_machine:unsafe_to_br_latency(UTy,Ty), + N, DAG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cd_unsafe_deps +%% Description : Adds dependencies between branches and unsafe's +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +cd_unsafe_deps(none, _, _, DAG) -> + DAG; +cd_unsafe_deps({Br,BrTy}, N, Ty, DAG) -> + dep_arc(Br, hipe_target_machine:br_to_unsafe_latency(BrTy, Ty), N, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : def_use +%% Argument : Instr +%% Description : Returns the registers that Instr defines resp. uses as 2 lists +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +def_use(Instr) -> + {hipe_sparc:defines(Instr), hipe_sparc:uses(Instr)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : move_or_alu +%% Description : True if the instruction is a move or an alu; false otherwise +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +move_or_alu(#move{}) -> true; +move_or_alu(#alu{}) -> true; +move_or_alu(_) -> false. + +%% Debugging stuff below %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifdef(debug1). +debug1_stuff(Blk, DAG, IxBlk, Sch, X) -> + io:format("Blk: ~p~n",[Blk]), + io:format("DAG: ~n~p~n~p",[DAG,IxBlk]), + io:format("~n"), + print_instrs(IxBlk), + print_sch(Sch, IxBlk), + print_instrs2(X). + +print_instrs([]) -> + io:format("~n"); +print_instrs([{N,Instr} | Instrs]) -> + io:format("(~p): ",[N]), + hipe_sparc_pp:pp_instr(Instr), + io:format("~p~n",[element(1,Instr)]), + print_instrs(Instrs). + +print_instrs2([]) -> + io:format("~n"); +print_instrs2([Instr | Instrs]) -> + hipe_sparc_pp:pp_instr(Instr), + print_instrs2(Instrs). + +print_sch([],_) -> io:format("~n"); +print_sch([{{cycle,Cycle},{node,I}} | Rest], IxBlk) -> + io:format("{C~p, N~p} ",[Cycle,I]), + print_node(I, IxBlk), + print_sch(Rest, IxBlk). + +print_node(_, []) -> + io:format("~n"); +print_node(I, [{I, Instr} | _]) -> + hipe_sparc_pp:pp_instr(Instr); +print_node(I, [_ | IxBlk]) -> + print_node(I, IxBlk). +-else. +debug1_stuff(_Blk, _DAG, _IxBlk, _Sch, _X) -> + ok. +-endif. diff --git a/lib/hipe/opt/hipe_schedule_prio.erl b/lib/hipe/opt/hipe_schedule_prio.erl new file mode 100644 index 0000000000..4d078b007d --- /dev/null +++ b/lib/hipe/opt/hipe_schedule_prio.erl @@ -0,0 +1,58 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% PRIORITY HANDLING AND PRIORITY CALCULATION +%% +%% Handling of ready nodes and priorities. +%% - at present, all nodes have the same priority and so on. +%% +%% *** UNFINISHED *** +%% - should compute a static priority estimate +%% - should dynamically modify priorities + possibly insert NOPs +%% (e.g., to separate branches, etc.) +%% - thus, ought to be passed the current schedule and/or resources as well + +-module(hipe_schedule_prio). +-export([init_ready/2, + init_instr_prio/2, + %% initial_ready_set/4, + next_ready/7, + add_ready_nodes/2, + insert_node/3 + ]). + +init_ready(Size,Preds) -> + hipe_ultra_prio:init_ready(Size,Preds). + +init_instr_prio(N,DAG) -> + hipe_ultra_prio:init_instr_prio(N,DAG). + +%% initial_ready_set(M,N,Preds,Ready) -> +%% hipe_ultra_prio:initial_ready_set(M,N,Preds,Ready). + +next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl) -> + hipe_ultra_prio:next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl). + +add_ready_nodes(NodeLst,Ready) -> + hipe_ultra_prio:add_ready_nodes(NodeLst,Ready). + +insert_node(C,I,Ready) -> + hipe_ultra_prio:insert_node(C,I,Ready). diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl new file mode 100644 index 0000000000..df885a7dff --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -0,0 +1,111 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ========================================================================== +%% Module : hipe_spillmin +%% Purpose : Driver module for minimizing the number of stack slots used +%% by a function. This is done using an algorithm for register +%% allocation. The implementation is target-independent and +%% requires a target-specific interface module as argument. +%% +%% $Id$ +%% ========================================================================== +%% Exported functions (short description): +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% {Coloring, NumberOfSpills} +%% Takes a CFG and the TempMap from register allocation and returns +%% a coloring of stack slots. +%% StackSlots should be a list of used stack slots, usually empty at +%% first call to function. +%% SpillIndex is the the first position we will spill to, usually 0. +%% TempMap is the TempMap from the register allocation +%% +%% The Coloring will be in the form of the "allocation datastructure" +%% described below, that is, a list of tuples on the form +%% {Name, {spill, SpillIndex}} +%% The NumberOfSpills is either 0 indicating no spill or the +%% SpillIndex of the last spilled register. +%% +%% mapmerge(Map, SpillMap) -> NewMap +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_spillmin). +-export([stackalloc/6, mapmerge/2]). + +%%-define(DEBUG, 1). +-define(HIPE_INSTRUMENT_COMPILER, true). + +%%--------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) +%% Calculates an allocation of stack slots using either a linear scan +%% or a graph coloring allocation algorithm. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> + case proplists:get_bool(spillmin_color, Options) of + false -> + ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, + Options, Target, TempMap), + "Spill minimize, linear scan", Options); + true -> + ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, + Options, Target, TempMap), + "Spill minimize, graph coloring", Options) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% mapmerge(Map, SpillMap) +%% +%% stackalloc/6 will only return the subset of the tempmap that contains +%% the spilled temporaries. This function is used to merge the old +%% complete tempmap with the new spill information. +%% Map is the old map (a list of [{R0, C1}, {R1, C2}, ...]). +%% SpillMap is the new "spill" map. +%% !! Warning, the function does not work with the maps in another order !! +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Combines the map with allocated spills with a map from the register +%% allocator + +-spec mapmerge(hipe_map(), hipe_spill_map()) -> hipe_map(). + +mapmerge(TempMap, SpillMap) -> + mapmerge(TempMap, SpillMap, []). + +mapmerge([], _, Ack) -> + lists:reverse(Ack); +mapmerge([{T1, _}|T1s], [{T2, C}|T2s], Ack) when T1 =:= T2 -> + mapmerge(T1s, T2s, [{T1, C}|Ack]); +mapmerge([{_, unknown}|T1s], T2s, Ack) -> + mapmerge(T1s, T2s, Ack); +mapmerge([T1|T1s], T2s, Ack) -> + mapmerge(T1s, T2s, [T1|Ack]). diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl new file mode 100644 index 0000000000..11a281100b --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -0,0 +1,556 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% =========================================================================== +%%@doc +%% GRAPH COLORING STACK SLOT SPILL MINIMIZER +%% +%% A simple pessimistic graph coloring stack slot spill minimizer +%% +%% - build interference graph +%% - estimate number of stack slots needed +%% - simplify graph (push on stack, abort and retry with more stack slots if spill) +%% - select colors +%% +%% Emits a coloring: a list of {TempName,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node +%% +%% This version uses ETS tables +%% +%% Deficiencies: +%% - pessimistic coloring +%% + +-module(hipe_spillmin_color). + +-export([stackalloc/6]). + +%%-ifndef(DO_ASSERT). +%%-define(DO_ASSERT, true). +%%-endif. + +%%-ifndef(DEBUG). +%%-define(DEBUG,0). +%%-endif. + +%%--------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +%% Emits a coloring: a list of {TempName,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> + ?report2("building IG~n", []), + {IG, NumNodes} = build_ig(CFG, Target, TempMap), + {Cols, MaxColors} = + color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1), + SortedCols = lists:sort(Cols), + {remap_temp_map(SortedCols, TempMap, SpillIndex), SpillIndex+MaxColors}. + +%% Rounds a floating point value upwards +ceiling(X) -> + T = trunc(X), + case (X - T) of + Neg when Neg < 0.0 -> T; + Pos when Pos > 0.0 -> T + 1; + _ -> T + end. + +%% Emits a coloring: an unsorted list of {Temp,Location} +%% where Location is {spill,M}. +%% {spill,M} denotes the Mth spilled node +%% +%% Notes: +%% - Arguments: +%% IG: The interference graph +%% Min: The lower bound, the minimal number of colors tried. +%% Max: The upper bound, the maximal number of colors tried. +%% Safe: The number of colors that are guaranteed to work. This is +%% needed, because we reuse information from color() about how +%% many colors it used at the last try, but this is not guaranteed to +%% be a feasible solution because color might work differently using +%% more colors although it has successfully colored the graph with +%% fewer colors previously. Example: color(666) colors with 23 colors, +%% but color(23) fails. +%% We use Safe inefficently, because we run color 1 additional +%% time with the same argument if Safe is needed. +%% MaxNodes: The number of nodes in IG. +%% Target: Target specific information. +%% MaxDepth: The maximum recursion depth. +color_heuristic(IG, Min, Max, Safe, MaxNodes, Target, MaxDepth) -> + case MaxDepth of + 0 -> + case color(IG, ordsets:from_list(init_stackslots(Max)), + MaxNodes, Target) of + not_easily_colorable -> + color(IG, ordsets:from_list(init_stackslots(Safe)), + MaxNodes, Target); + Else -> + Else + end; + _ -> + %% This can be increased from 2, and by this the heuristic can be + %% exited earlier, but the same can be achived by decreasing the + %% recursion depth. This should not be decreased below 2. + case (Max - Min) < 2 of + true -> + case color(IG, ordsets:from_list(init_stackslots(Max)), + MaxNodes, Target) of + not_easily_colorable -> + color(IG, ordsets:from_list(init_stackslots(Safe)), + MaxNodes, Target); + Else -> + Else + end; + false -> + NumSlots = ceiling((Max - Min)/2) + Min, + case color(IG, ordsets:from_list(init_stackslots(NumSlots)), + MaxNodes, Target) of + not_easily_colorable -> + color_heuristic(IG, NumSlots, Max, + Safe, MaxNodes, Target, MaxDepth - 1); + {_TmpCols, TmpMaxColors} -> + color_heuristic(IG, Min, TmpMaxColors, + NumSlots, MaxNodes, Target, MaxDepth - 1) + end + end + end. + +%% Returns a new temp map with the spilled temporaries mapped to stack slots, +%% located after SpillIndex, according to Cols. +remap_temp_map(Cols, TempMap, SpillIndex) -> + remap_temp_map0(Cols, hipe_temp_map:to_substlist(TempMap), SpillIndex). + +remap_temp_map0([], _TempMap, _SpillIndex) -> + []; +remap_temp_map0([{_M, {spill, N}}|Xs], [{TempNr, {spill,_}}|Ys], SpillIndex) -> + [{TempNr, {spill, SpillIndex + N-1}}|remap_temp_map0(Xs, Ys, SpillIndex)]; +remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> + remap_temp_map0(Cols, Ys, SpillIndex). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** BUILD THE INTERFERENCE GRAPH *** +%% +%% Returns {Interference_graph, Number_Of_Nodes} +%% + +build_ig(CFG, Target, TempMap) -> + try build_ig0(CFG, Target, TempMap) + catch error:Rsn -> exit({regalloc, build_ig, Rsn}) + end. + +%% Creates an ETS table consisting of the keys given in List, with the values +%% being an integer which is the position of the key in List. +%% [1,5,7] -> {1,0} {5,1} {7,2} +%% etc. +setup_ets(List) -> + setup_ets0(List, ets:new(tempMappingTable, []), 0). + +setup_ets0([], Table, _N) -> + Table; +setup_ets0([X|Xs], Table, N) -> + ets:insert(Table, {X, N}), + setup_ets0(Xs, Table, N+1). + +build_ig0(CFG, Target, TempMap) -> + Live = Target:analyze(CFG), + TempMapping = map_spilled_temporaries(TempMap), + TempMappingTable = setup_ets(TempMapping), + NumSpilled = length(TempMapping), + IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled), + Target, TempMap, TempMappingTable), + ets:delete(TempMappingTable), + {normalize_ig(IG), NumSpilled}. + +build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) -> + IG; +build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) -> + Xs = bb(CFG, L, Target), + LiveOut = [X || X <- liveout(Live, L, Target), + hipe_temp_map:is_spilled(X, TempMap)], + LiveOutList = ordsets:to_list(LiveOut), + LiveOutListMapped = list_map(LiveOutList, TempMapping, []), + LiveOutSetMapped = ordsets:from_list(LiveOutListMapped), + {_, NewIG} = + build_ig_bb(Xs, LiveOutSetMapped, IG, Target, TempMap, TempMapping), + build_ig_bbs(Ls, CFG, Live, NewIG, Target, TempMap, TempMapping). + +build_ig_bb([], LiveOut, IG, _Target, _TempMap, _TempMapping) -> + {LiveOut, IG}; +build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) -> + {Live,NewIG} = + build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping), + build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping). + +build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) -> + {Def, Use} = def_use(X, Target, TempMap), + ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]), + DefListMapped = list_map(Def, TempMapping, []), + UseListMapped = list_map(Use, TempMapping, []), + DefSetMapped = ordsets:from_list(DefListMapped), + UseSetMapped = ordsets:from_list(UseListMapped), + NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG), + NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)), + {NewLive, NewIG}. + +%% Given a list of Keys and an ets-table returns a list of the elements +%% in Mapping corresponding to the Keys and appends Acc to this list. +list_map([], _Mapping, Acc) -> + Acc; +list_map([X|Xs], Mapping, Acc) -> + {_Key, Val} = hd(ets:lookup(Mapping, X)), + list_map(Xs, Mapping, [Val | Acc]). + +%% Returns an ordered list of spilled temporaries in TempMap +map_spilled_temporaries(TempMap) -> + map_spilled_temporaries0(hipe_temp_map:to_substlist(TempMap)). + +map_spilled_temporaries0([]) -> + []; +map_spilled_temporaries0([{N, {spill, _}}|Xs]) -> + [N | map_spilled_temporaries0(Xs)]; +map_spilled_temporaries0([_X|Xs]) -> + map_spilled_temporaries0(Xs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +interference_arcs([], _Live, IG) -> + IG; +interference_arcs([X|Xs], Live, IG) -> + interference_arcs(Xs, Live, i_arcs(X, Live, IG)). + +i_arcs(_X, [], IG) -> + IG; +i_arcs(X, [Y|Ys], IG) -> + i_arcs(X, Ys, add_edge(X, Y, IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** COLORING *** +%% +%% Coloring is done straightforwardly: +%% - find the low-degree nodes, put them in low +%% - while low non-empty: +%% * remove x from low +%% * push x on stack +%% * decrement degree of neighbors of x +%% * for each neighbor y of low degree, put y on low +%% - when low empty: +%% - if graph empty, return stack +%% - otherwise +%% throw an exception (the caller should retry with more stack slots) + +color(IG, StackSlots, NumNodes, Target) -> + try + color_0(IG, StackSlots, NumNodes, Target) + catch + error:Rsn -> + ?error_msg("Coloring failed with ~p~n", [Rsn]), + ?EXIT(Rsn) + end. + +color_0(IG, StackSlots, NumNodes, Target) -> + ?report("simplification of IG~n", []), + K = ordsets:size(StackSlots), + Nodes = list_ig(IG), + Low = low_degree_nodes(Nodes, K), + ?report(" starting with low degree nodes ~p~n", [Low]), + EmptyStk = [], + case simplify(Low, NumNodes, IG, K, EmptyStk, Target) of + non_simplifiable -> not_easily_colorable; + Stk -> + ?report(" selecting colors~n", []), + select(Stk, IG, StackSlots, NumNodes) + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Simplification: push all easily colored nodes on a stack; +%% when the list of easy nodes becomes empty, see if graph is +%% empty as well. If it is not, throw an exception and abort. +%% If it is empty, return the stack. +%% +%% Notes: +%% - Arguments: +%% Low: low-degree nodes (ready to color) +%% NumNodes: number of remaining nodes in graph +%% IG: interference graph +%% K: number of colors +%% Stk: stack of already simplified nodes +%% Target: Machine to compile for + +simplify(Low, NumNodes, IG, K, Stk, Target) -> + Vis = none_visited(NumNodes), + simplify_ig(Low, NumNodes, IG, K, Stk, Vis, Target). + +simplify_ig([], 0, _IG, _K, Stk, _Vis, _Target) -> + Stk; +simplify_ig([], N, _IG, _K, _Stk, _Vis, _Target) when N > 0 -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + non_simplifiable; +simplify_ig([X|Xs], N, IG, K, Stk, Vis, Target) -> + ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]), + case is_visited(X, Vis) of + true -> + ?report(" node ~p already visited~n", [X]), + simplify_ig(Xs, N, IG, K, Stk, Vis, Target); + false -> + ?report("Stack ~w\n", [Stk]), + {NewLow, NewIG} = decrement_neighbors(X, Xs, IG, Vis, K), + ?report(" node ~w pushed\n(~w now ready)~n", [X, NewLow]), + NewStk = push_colored(X, Stk), + simplify_ig(NewLow, N-1, NewIG, K, NewStk, visit(X, Vis), Target) + end. + +decrement_neighbors(X, Xs, IG, Vis, K) -> + Ns = unvisited_neighbors(X, Vis, IG), + ?report(" node ~p has neighbors ~w\n(unvisited ~p)~n", + [X, neighbors(X, IG), Ns]), + decrement_each(Ns, Xs, IG, Vis, K). + +%% For each node, decrement its degree and check if it is now +%% a low-degree node. In that case, add it to the 'low list'. +decrement_each([], Low, IG, _Vis, _K) -> + {Low, IG}; +decrement_each([N|Ns], OldLow, IG, Vis, K) -> + {Low, CurrIG} = Res = decrement_each(Ns, OldLow, IG, Vis, K), + case is_visited(N, Vis) of + true -> + Res; + false -> + {D, NewIG} = decrement_degree(N, CurrIG), + if + D =:= K-1 -> + {[N|Low], NewIG}; + true -> + {Low, NewIG} + end + end. + +%%%%%%%%%%%%%%%%%%%% +%% +%% Returns a list of {Name,Location}, where Location is {spill,M} +%% +%% Note: we use pessimistic coloring here. +%% - we could use optimistic coloring: for spilled node, check if there is +%% an unused color among the neighbors and choose that. + +select(Stk, IG, PhysRegs, NumNodes) -> + select_colors(Stk, IG, none_colored(NumNodes), PhysRegs). + +select_colors([], _IG, _Cols, _PhysRegs) -> + ?report("all nodes colored~n", []), + {[], 0}; +select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) -> + ?report("color of ~p\n", [X]), + {Slot,NewCols} = select_color(X, IG, Cols, PhysRegs), + ?report("~p~n", [Slot]), + {Tail, MaxColor} = select_colors(Xs, IG, NewCols, PhysRegs), + NewMaxColor = erlang:max(Slot, MaxColor), + %% Since we are dealing with spills we label all our temporaries accordingly. + {[{X,{spill,Slot}} | Tail], NewMaxColor}. + +select_color(X, IG, Cols, PhysRegs) -> + UsedColors = get_colors(neighbors(X, IG), Cols), + Reg = select_unused_color(UsedColors, PhysRegs), + {Reg, set_color(X, Reg, Cols)}. + +%%%%%%%%%%%%%%%%%%%% + +get_colors([], _Cols) -> []; +get_colors([X|Xs], Cols) -> + case color_of(X, Cols) of + uncolored -> + get_colors(Xs, Cols); + {color, R} -> + [R|get_colors(Xs, Cols)] + end. + +select_unused_color(UsedColors, PhysRegs) -> + Summary = ordsets:from_list(UsedColors), + AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), + hd(AvailRegs). + +push_colored(X, Stk) -> + [{X, colorable} | Stk]. + +low_degree_nodes([], _K) -> []; +low_degree_nodes([{N,Info}|Xs], K) -> + ?report0("node ~p has degree ~p: ~w~n", [N, degree(Info), neighbors(Info)]), + Deg = degree(Info), + if + Deg < K -> + [N|low_degree_nodes(Xs, K)]; + true -> + low_degree_nodes(Xs, K) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unvisited_neighbors(X, Vis, IG) -> + ordsets:from_list(unvisited(neighbors(X, IG), Vis)). + +unvisited([], _Vis) -> []; +unvisited([X|Xs], Vis) -> + case is_visited(X, Vis) of + true -> + unvisited(Xs, Vis); + false -> + [X|unvisited(Xs, Vis)] + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** ABSTRACT DATATYPES *** +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% +%% The stack slot datatype +%% + +init_stackslots(NumSlots) -> + init_stackslots(NumSlots, []). + +init_stackslots(0, Acc) -> + Acc; +init_stackslots(NumSlots, Acc) -> + init_stackslots(NumSlots - 1, [NumSlots|Acc]). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The ig datatype: +%% +%% Note: if we know the number of temps used, we can use a VECTOR +%% instead, which will speed up things. +%% +%% Note: later on, we may wish to add 'move-related' support. + +-record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}). + +empty_ig(NumNodes) -> + hipe_vectors:new(NumNodes, #ig_info{}). + +degree(Info) -> + Info#ig_info.degree. + +neighbors(Info) -> + Info#ig_info.neighbors. + +add_edge(X, X, IG) -> IG; +add_edge(X, Y, IG) -> + add_arc(X, Y, add_arc(Y, X, IG)). + +add_arc(X, Y, IG) -> + Info = hipe_vectors:get(IG, X), + Old = neighbors(Info), + New = Info#ig_info{neighbors = [Y|Old]}, + hipe_vectors:set(IG,X,New). + +normalize_ig(IG) -> + Size = hipe_vectors:size(IG), + normalize_ig(Size-1, IG). + +normalize_ig(-1, IG) -> + IG; +normalize_ig(I, IG) -> + Info = hipe_vectors:get(IG, I), + N = ordsets:from_list(neighbors(Info)), + NewInfo = Info#ig_info{neighbors = N, degree = length(N)}, + NewIG = hipe_vectors:set(IG, I, NewInfo), + normalize_ig(I-1, NewIG). + +neighbors(X, IG) -> + Info = hipe_vectors:get(IG, X), + Info#ig_info.neighbors. + +decrement_degree(X, IG) -> + Info = hipe_vectors:get(IG, X), + Degree = degree(Info), + NewDegree = Degree-1, + NewInfo = Info#ig_info{degree = NewDegree}, + {NewDegree, hipe_vectors:set(IG, X, NewInfo)}. + +list_ig(IG) -> + hipe_vectors:list(IG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The coloring datatype: + +none_colored(NumNodes) -> + hipe_vectors:new(NumNodes, uncolored). + +color_of(X, Cols) -> + hipe_vectors:get(Cols, X). + +set_color(X, R, Cols) -> + hipe_vectors:set(Cols, X, {color, R}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Note: there might be a slight gain in separating the two versions +%% of visit/2 and visited/2. (So that {var,X} selects X and calls +%% the integer version. + +none_visited(NumNodes) -> + hipe_vectors:new(NumNodes, false). + +visit(X, Vis) -> + hipe_vectors:set(Vis, X, true). + +is_visited(X, Vis) -> + hipe_vectors:get(Vis, X). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% *** INTERFACES TO OTHER MODULES *** +%% + +liveout(CFG, L, Target) -> + ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). + +bb(CFG, L, Target) -> + hipe_bb:code(Target:bb(CFG, L)). + +def_use(X, Target, TempMap) -> + Defines = [Y || Y <- reg_names(Target:defines(X), Target), + hipe_temp_map:is_spilled(Y, TempMap)], + Uses = [Z || Z <- reg_names(Target:uses(X), Target), + hipe_temp_map:is_spilled(Z, TempMap)], + {Defines, Uses}. + +reg_names(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl new file mode 100644 index 0000000000..c58906c389 --- /dev/null +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -0,0 +1,559 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% =========================================================================== +%% Copyright (c) 2002 by Niklas Andersson, Andreas Lundin, and Erik Johansson. +%% =========================================================================== +%% Module : hipe_spillmin_scan +%% Purpose : Optimizes the number of stack slots used by using a +%% "linear-scan algorithm" to allocate stack slots. +%% Notes : * This is a simplified implementation of +%% "Linear Scan Register Allocation" by +%% Massimiliano Poletto & Vivek Sarkar described in +%% ACM TOPLAS Vol 21, No 5, September 1999. +%% +%% * This implementation is target-independent and +%% requires a target specific interface module +%% as argument. +%% +%% * Based on the hipe_ls_regalloc module by Erik Johansson +%% +%% History : * 2002-04-01, NA & AL: Created +%% * 2002-10-08, Happi: Cleanup and speedup +%% ============================================================================ +%% Exported functions (short description): +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% {Coloring, NumberOfSpills} +%% Takes a CFG and the TempMap from register allocation and returns +%% a coloring of stack slots. +%% StackSlots should be a list of used stack slots, usually empty at +%% first call to function. +%% SpillIndex is the the first position we will spill to, usually 0. +%% TempMap is the TempMap from the register allocation +%% +%% The Coloring will be in the form of the "allocation datastructure" +%% described below, that is, a list of tuples on the form +%% {Name, {spill, SpillIndex}} +%% The NumberOfSpills is either 0 indicating no spill or the +%% SpillIndex of the last spilled register. +%% +%% mapmerge +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_spillmin_scan). + +-export([stackalloc/6]). + +%%-define(DEBUG, 1). +-define(HIPE_INSTRUMENT_COMPILER, true). + +%%---------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../flow/cfg.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) +%% Calculates an allocation of stack slots using a linear_scan algorithm. +%% There are three steps in the algorithm: +%% 1. Calculate live-ranges for all spilled temporaries. +%% 2. Calculate live-intervals for each temporary. +%% The live interval consists of a start position and a end position +%% these are the first definition and last use of the temporary +%% given as instruction numbers in a breadth-first traversal of the +%% control-flow-graph. +%% 3. Do a linear scan allocation over the live intervals. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-spec stackalloc(#cfg{}, [_], non_neg_integer(), + comp_options(), module(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> + ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), + %% Step 1: Calculate liveness (Call external implementation.) + Liveness = liveness(CFG, Target), + ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), + USIntervals = calculate_intervals(CFG, Liveness, Options, + Target, TempMap), + %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), + Intervals = sort_on_start(USIntervals), + ?debug_msg("sort intervals (done) ~w\n", [erlang:statistics(runtime)]), + ?debug_msg("Intervals ~w\n", [Intervals]), + ?debug_msg("No intervals: ~w\n", [length(Intervals)]), + ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), + Allocation = allocate(Intervals, StackSlots, SpillIndex, Target), + ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), + Allocation. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 2: Calculate live-intervals for each temporary. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% calculate_intervals(CFG, Liveness, Options, Target, TempMap) +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the intervals of the temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - +calculate_intervals(CFG, Liveness, _Options, Target, TempMap) -> + Interval = empty_interval(Target:number_of_temporaries(CFG)), + Worklist = Target:reverse_postorder(CFG), + intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap). + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% intervals(WorkList, Intervals, InstructionNr, +%% CFG, Liveness, Target, TempMap) +%% WorkList: List of BB-names to handle. +%% Intervals: Intervals seen so far (sorted on register names). +%% InstructionNr: The number of examined instructions. +%% CFG: The Control-Flow Graph. +%% Liveness: A map of live-in and live-out sets for each Basic-Block. +%% Target: The backend for which we generate native code. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the intervals of the temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - +intervals([L|ToDO], Intervals, InstructionNr, CFG, Liveness, Target, + TempMap) -> + ?debug_msg("Block ~w\n", [L]), + %% Add all variables that are live at the entry of this block + %% to the interval data structure. + + %% Only consider spilled temporaries in LiveIn + LiveIn = [X || X <- livein(Liveness, L, Target), + hipe_temp_map:is_spilled(X, TempMap)], + Intervals2 = add_def_point(LiveIn, InstructionNr, Intervals), + + %% Only consider spilled temporaries in LiveOut + LiveOut = [X2 || X2 <- liveout(Liveness, L, Target), + hipe_temp_map:is_spilled(X2, TempMap)], + ?debug_msg("In ~w -> Out ~w\n", [LiveIn, LiveOut]), + + %% Traverse this block instruction by instruction and add all + %% uses and defines to the intervals. + Code = hipe_bb:code(bb(CFG, L, Target)), + {Intervals3, NewINr} = traverse_block(Code, InstructionNr+1, + Intervals2, Target, TempMap), + + %% Add end points for the temporaries that are in the live-out set. + Intervals4 = add_use_point(LiveOut, NewINr+1, Intervals3), + + intervals(ToDO, Intervals4, NewINr+1, CFG, Liveness, Target, TempMap); +intervals([], Intervals, _, _, _, _, _) -> + %% Return the calculated intervals + interval_to_list(Intervals). + %% Intervals. + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% traverse_block(Code, InstructionNo, Intervals, Unchanged) +%% Examine each instruction in the Code: +%% For each temporary T used or defined by instruction number N: +%% extend the interval of T to include N. +%% TempMap: The TempMap from the register allocation +%% +%% This function will only consider the the instruction that have temporaries +%% that have been spilled during register allocation, and will ignore +%% all other. +%%- - - - - - - - - - - - - - - - - - - - - - - - + +traverse_block([Instruction|Is], InstrNo, Intervals, Target, TempMap) -> + %% Get used temps. + %% Only consider spilled temporaries in the Use set. + UsesSet = [X || X <- uses(Instruction, Target), + hipe_temp_map:is_spilled(X, TempMap)], + %% Get defined temps. + %% Only consider spilled temporaries in the Def set. + DefsSet = [X2 || X2 <- defines(Instruction, Target), + hipe_temp_map:is_spilled(X2, TempMap)], + %% Only consider those temps that starts or ends their lifetime + %% within the basic block (that is remove all Unchanged temps). + Intervals1 = add_def_point( DefsSet, InstrNo, Intervals), + %% Extend the intervals for these temporaries to include InstrNo. + Intervals2 = add_use_point(UsesSet, InstrNo, Intervals1), + %% Handle the next instruction. + traverse_block(Is, InstrNo+1, Intervals2, Target, TempMap); +traverse_block([], InstrNo, Intervals, _, _) -> + %% Return the new intervals and the number of the next instruction. + {Intervals,InstrNo}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% Step 3. Do a linear scan allocation over the live intervals. %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% allocate(Intervals, PhysicalRegisters, Target) +%% +%% This function performs the linear scan algorithm. +%% Intervals contains the start and stop position of each spilled temporary, +%% sorted on increasing startpositions +%% StackSlots is a list of available Stack slots to use. If they run out a +%% new stack slot is allocated from an (in theory) infinite domain. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate(Intervals, StackSlots, SpillIndex, Target) -> + AllocatedSlots = empty_allocation(), + allocate(Intervals, StackSlots, [], AllocatedSlots, SpillIndex, Target). + +%%- - - - - - - - - - - - - - - - - - - - - - - - +%% allocate(Intervals, Free, Active, Allocated, SpillIndex, Target) +%% Iterates on each temporary interval. +%% Intervals: The list of temporary intervals. +%% Free: Currently available stack slots. +%% Active: Currently used stack slots (sorted on increasing +%% interval enpoints) +%% Allocated: The mapping of register names to spill positions. +%% SpillIndex: The number of spilled registers. +%%- - - - - - - - - - - - - - - - - - - - - - - - +allocate([TempInt|TIS], Free, Active, Alloc, SpillIndex, Target) -> + %% Remove from the active list those temporaries whose interval + %% ends before the start of the current interval. + {NewActive, NewFree} = + expire_old_intervals(Active, startpoint(TempInt), Free, Target), + %% Get the name of the temp in the current interval. + Temp = reg(TempInt), + case NewFree of + [] -> + %% There are no free spill slots, so we allocate a new one + NewSpillIndex = SpillIndex+1, + NewAlloc = spillalloc(Temp, SpillIndex, Alloc), + NewActive2 = add_active(endpoint(TempInt), SpillIndex, NewActive), + allocate(TIS, NewFree, NewActive2, NewAlloc, NewSpillIndex, Target); + [FreeSpillslot | Spillslots] -> + %% The spill slot FreeSpillSlot is available, let's use it. + allocate(TIS, Spillslots, + add_active(endpoint(TempInt), FreeSpillslot, NewActive), + spillalloc(Temp, FreeSpillslot, Alloc), + SpillIndex, Target) + end; +allocate([], _, _, Alloc, SpillIndex, _) -> + %% No more register intervals to handle; + %% return the result sorted on regnames. + {lists:sort(Alloc), SpillIndex}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% expire_old_intervals(ActiveTemps, CurrentPos, FreeRegisters) +%% Remove all temporaries that have live-ranges that ends before the +%% current position from the active list and put them into the free +%% list instead. +%% +%% --------------------------------------------------------------------- +expire_old_intervals([Act|Acts] = AllActives, CurrentPos, Free, Target) -> + %% Does the live-range of the first active register end before + %% the current position? + + %% We expand multimove before regalloc, ignore the next 2 lines. + %% %% We don't free registers that end at the current position, + %% %% since a multimove can decide to do the moves in another order... + case active_endpoint(Act) =< CurrentPos of + true -> %% Yes -> Then we can free that register. + Spillslot = active_spillslot(Act), + %% Add the spillslot to the free pool. + NewFree = [Spillslot|Free], + %% Here we could try appending the register to get a more + %% widespread use of registers. + %% Free ++ [active_spillslot(Act)]); + expire_old_intervals(Acts, CurrentPos, NewFree, Target); + false -> + %% No -> Then we cannot free any more temporaries. + %% (Since they are sorted on endpoints...) + {AllActives, Free} + end; +expire_old_intervals([], _, Free, _) -> + {[], Free}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% %% +%% D A T A S T R U C T U R E S %% +%% & %% +%% A U X I L I A R Y F U N C T I O N S %% +%% %% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The "allocation datastructure" +%% +%% This is an order list of register names paired with their allocations. +%% {Name, Allocation} +%% Since we are only dealing with spills, the allocation will look like: +%% {spill, SpillIndex} +%% +%% --------------------------------------------------------------------- + +empty_allocation() -> []. + +spillalloc(Name, N, Allocation) -> [{Name,{spill,N}}|Allocation]. + +%% spillalloc(Name,N,[{Name,_}|A]) -> +%% ?debug_msg("Spilled ~w\n",[Name]), +%% [{Name,{spill,N}}|A]; +%% spillalloc(Name,N,[{Name2,Binding}|Bindings]) when Name > Name2 -> +%% [{Name2,Binding}|spillalloc(Name,N,Bindings)]; +%% spillalloc(Name,N,Bindings) -> +%% [{Name,{spill,N}}|Bindings]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The active datastructure. +%% Keeps tracks of currently active (allocated) spill slots. +%% It is sorted on end points in the intervals +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_active(Endpoint, SpillSlot, [A1={P1,_}|Active]) when P1 < Endpoint -> + [A1|add_active(Endpoint, SpillSlot, Active)]; +add_active(Endpoint, SpillSlot, Active) -> + [{Endpoint, SpillSlot}|Active]. + +active_spillslot({_,SpillSlot}) -> + SpillSlot. + +active_endpoint({EndPoint,_}) -> + EndPoint. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Interval data structure. +%% +%%- - - - - - - - - - - - - - - - - - - - - - - - + +%% mk_interval(Name, Start, End) -> +%% {Name, Start, End}. + +endpoint({_R,_S,Endpoint}) -> + Endpoint. + +startpoint({_R,Startpoint,_E}) -> + Startpoint. + +reg({RegName,_S,_E}) -> + RegName. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The Intervals data structure. + +sort_on_start(I) -> + lists:keysort(2, I). + +-ifdef(gb_intervals). +empty_interval(_) -> + gb_trees:empty(). + +interval_to_list(Intervals) -> + lists:flatten( + lists:map( + fun({T, I}) when is_list(I) -> + lists:map( + fun ({none, End}) -> + {T,End,End}; + ({Beg, none}) -> + {T,Beg, Beg} + end, + I); + ({T,{B,E}}) -> {T, B, E} + end, + gb_trees:to_list(Intervals))). + +add_use_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of the temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case gb_trees:lookup(Temp, Intervals) of + %% This temp has an old interval... + {value, Value} -> + %% ... extend it. + extend_interval(Pos, Value); + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos} + end, + %% Add or update the extended interval. + Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals), + %% Add the rest of the temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. + +extend_def_interval(Pos, {Beginning, End}) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}; +extend_def_interval(Pos, [{Beginning, none}|More]) -> + [{Pos,none}, {Beginning, none}|More]; +extend_def_interval(Pos, Intervals) -> + {Pos, Pos}. + +-else. %% ifdef gb_intervals + +empty_interval(N) -> + hipe_vectors:new(N, none). + +interval_to_list(Intervals) -> + add_indices(hipe_vectors:vector_to_list(Intervals), 0). + +add_indices([{B, E}|Xs], N) -> + [{N, B, E}|add_indices(Xs, N+1)]; +add_indices([List|Xs], N) when is_list(List) -> + flatten(List, N, Xs); +add_indices([none|Xs], N) -> + add_indices(Xs, N+1); +add_indices([], _N) -> []. + +flatten([{none, End}|Rest], N, More) -> + [{N,End,End} | flatten(Rest, N, More)]; +flatten([{Beg, none}|Rest], N ,More) -> + [{N,Beg,Beg} | flatten(Rest, N, More)]; +flatten([], N, More) -> + add_indices(More, N+1). + +add_use_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of the temporaries. + add_use_point(Temps, Pos, Intervals2); +add_use_point([], _, I) -> + %% No more to add return the interval. + I. + +add_def_point([Temp|Temps], Pos, Intervals) -> + %% Extend the old interval... + NewInterval = + case hipe_vectors:get(Intervals, Temp) of + %% This is the first time we see this temp... + none -> + %% ... create a new interval + {Pos, Pos}; + %% This temp has an old interval... + Value -> + %% ... extend it. + extend_interval(Pos, Value) + end, + %% Add or update the extended interval. + Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval), + %% Add the rest of the temporaries. + add_def_point(Temps, Pos, Intervals2); +add_def_point([], _, I) -> + %% No more to add return the interval. + I. + +extend_interval(Pos, {Beginning, End}) + when is_integer(Beginning), is_integer(End) -> + %% If this position occurs before the beginning of the interval, + %% then extend the beginning to this position. + NewBeginning = erlang:min(Pos, Beginning), + %% If this position occurs after the end of the interval, then + %% extend the end to this position. + NewEnd = erlang:max(Pos, End), + {NewBeginning, NewEnd}. + +-endif. %% gb_intervals + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +liveness(CFG, Target) -> + Target:analyze(CFG). + +bb(CFG, L, Target) -> + Target:bb(CFG, L). + +livein(Liveness, L, Target) -> + regnames(Target:livein(Liveness, L), Target). + +liveout(Liveness, L, Target) -> + regnames(Target:liveout(Liveness, L), Target). + +uses(I, Target) -> + regnames(Target:uses(I), Target). + +defines(I, Target) -> + regnames(Target:defines(I), Target). + +regnames(Regs, Target) -> + [Target:reg_nr(X) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_target_machine.erl b/lib/hipe/opt/hipe_target_machine.erl new file mode 100644 index 0000000000..be9f095429 --- /dev/null +++ b/lib/hipe/opt/hipe_target_machine.erl @@ -0,0 +1,93 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% INTERFACE TO TARGET MACHINE MODEL +%% +%% Interfaces the instruction scheduler to the (resource) machine model. + +-module(hipe_target_machine). +-export([init_resources/1, + init_instr_resources/2, + resources_available/4, + advance_cycle/1 + ]). +-export([raw_latency/2, + war_latency/2, + waw_latency/2, + %% m_raw_latency/2, + %% m_war_latency/2, + %% m_waw_latency/2, + m_raw_latency/0, + m_war_latency/0, + m_waw_latency/0, + br_to_unsafe_latency/2, + unsafe_to_br_latency/2, + br_br_latency/2 + ]). + +-define(target,hipe_ultra_mod2). + +init_resources(X) -> + ?target:init_resources(X). + +init_instr_resources(X,Y) -> + ?target:init_instr_resources(X,Y). + +resources_available(X,Y,Z,W) -> + ?target:resources_available(X,Y,Z,W). + +advance_cycle(X) -> + ?target:advance_cycle(X). + +raw_latency(From,To) -> + ?target:raw_latency(From,To). + +war_latency(From,To) -> + ?target:war_latency(From,To). + +waw_latency(From,To) -> + ?target:waw_latency(From,To). + +%% m_raw_latency(From,To) -> +%% ?target:m_raw_latency(From,To). + +%% m_war_latency(From,To) -> +%% ?target:m_war_latency(From,To). + +%% m_waw_latency(From,To) -> +%% ?target:m_waw_latency(From,To). + +m_raw_latency() -> + ?target:m_raw_latency(). + +m_war_latency() -> + ?target:m_war_latency(). + +m_waw_latency() -> + ?target:m_waw_latency(). + +br_to_unsafe_latency(Br,U) -> + ?target:br_to_unsafe_latency(Br,U). + +unsafe_to_br_latency(U,Br) -> + ?target:unsafe_to_br_latency(U,Br). + +br_br_latency(Br1,Br2) -> + ?target:br_br_latency(Br1,Br2). diff --git a/lib/hipe/opt/hipe_ultra_mod2.erl b/lib/hipe/opt/hipe_ultra_mod2.erl new file mode 100644 index 0000000000..b039eaee80 --- /dev/null +++ b/lib/hipe/opt/hipe_ultra_mod2.erl @@ -0,0 +1,239 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ULTRASPARC MACHINE MODEL +%% +%% This module is used by the scheduler. +%% The following interface is used: +%% ... +%% +%% NOTES: +%% - the machine model is simple (on the verge of simplistic) +%% * all FUs are pipelined => model only one cycle at a time +%% * instruction latencies are mostly 1 +%% * floating point is left for later (I _think_ it works, but ...) +%% - conservative: instructions that require multiple resources are +%% modelled as 'single'; instead, they could reserve IEU+BR or whatever +%% - possibly inefficient: I think machine state model could be turned into +%% a bitvector. + +-module(hipe_ultra_mod2). +-export([init_resources/1, + init_instr_resources/2, + resources_available/4, + advance_cycle/1 + ]). +-export([raw_latency/2, + war_latency/2, + waw_latency/2, + %% m_raw_latency/2, + %% m_war_latency/2, + %% m_waw_latency/2, + m_raw_latency/0, + m_war_latency/0, + m_waw_latency/0, + br_to_unsafe_latency/2, + unsafe_to_br_latency/2, + br_br_latency/2 + ]). + +-include("../sparc/hipe_sparc.hrl"). + +-define(debug(Str,Args),ok). +%-define(debug(Str,Args),io:format(Str,Args)). + +-define(debug_ultra(Str,Args),ok). +%-define(debug_ultra(Str,Args),io:format(Str,Args)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Straightforward and somewhat simplistic model for UltraSparc: +%% - only one cycle at a time is modelled +%% - resources are simplified: +%% * ieu0, ieu1, ieu, mem, br, single +%% * per-cycle state = done | { I0, I1, NumI, X, Mem, Br } +%% * unoptimized representation (could be bit vector) + +init_resources(_Size) -> + ?debug_ultra('init res ~p~n',[_Size]), + empty_state(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_instr_resources(N,Nodes) -> + ultra_instr_rsrcs(Nodes,hipe_vectors:new(N, '')). + +ultra_instr_rsrcs([],I_res) -> I_res; +ultra_instr_rsrcs([N|Ns],I_res) -> + ultra_instr_rsrcs(Ns,ultra_instr_type(N,I_res)). + +ultra_instr_type({N,I},I_res) -> + hipe_vectors:set(I_res,N-1,instr_type(I)). + +instr_type(I) -> + case I of + #move{} -> + ieu; + #multimove{} -> %% TODO: expand multimoves before scheduling + ieu; + #alu{} -> + case hipe_sparc:alu_operator(I) of + '>>' -> ieu0; + '<<' -> ieu0; + _ -> ieu + end; + #alu_cc{} -> + ieu1; + #sethi{} -> + ieu; + #load{} -> + mem; + #store{} -> + mem; + #b{} -> + br; + #br{} -> + br; + #goto{} -> + br; + #jmp_link{} -> % imprecise; should be mem+br? + single; + #jmp{} -> % imprecise + br; + #call_link{} -> % imprecise; should be mem+br? + single; + #cmov_cc{} -> % imprecise + single; + #cmov_r{} -> % imprecise + single; + #load_atom{} -> % should be resolved to sethi/or + single; + #load_address{} -> % should be resolved to sethi/or + single; + #load_word_index{} -> % should be resolved to sethi/or + single; + %% uncommon types: + #label{} -> + none; + #nop{} -> + none; + #comment{} -> + none; + _ -> + exit({ultrasparc_instr_type,{cant_schedule,I}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +resources_available(_Cycle, I, Rsrc, I_res) -> + res_avail(instruction_resource(I_res, I), Rsrc). + +instruction_resource(I_res, I) -> + hipe_vectors:get(I_res, I-1). + +%% The following function checks resource availability. +%% * all function units are assumed to be fully pipelined, so only +%% one cycle at a time is modelled. +%% * for IEU0 and IEU1, these must precede all generic IEU instructions +%% (handled by X bit) +%% * at most 2 integer instructions can issue in a cycle +%% * mem is straightforward +%% * br closes the cycle (= returns done). +%% * single requires an entirely empty state and closes the cycle + +res_avail(ieu0, { free, I1, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { occ, I1, NumI+1, free, Mem, Br }}; +res_avail(ieu1, { _I0, free, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { free, occ, NumI+1, free, Mem, Br }}; +res_avail(ieu, { I0, I1, NumI, _X, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { I0, I1, NumI+1, occ, Mem, Br }}; +res_avail(mem, { I0, I1, NumI, X, free, Br }) -> + { yes, { I0, I1, NumI, X, occ, Br }}; +res_avail(br, { _I0, _I1, _NumI, _X, _Mem, free }) -> + { yes, done }; +res_avail(single, { free, free, 0, free, free, free }) -> + { yes, done }; +res_avail(_, _) -> + no. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +advance_cycle(_Rsrc) -> + empty_state(). + +empty_state() -> { free, free, 0, free, free, free }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Latencies are taken from UltraSparc hardware manual +%% +%% *** UNFINISHED *** +%% more precisely, they are taken from my memory of the US-manual +%% at the moment. +%% +%% Note: all ld/st are assumed to hit in the L1 cache (D-cache), +%% which is sort of imprecise. + +raw_latency(alu, store) -> 0; +raw_latency(load, _) -> 2; % only if load is L1 hit +raw_latency(alu_cc, b) -> 0; +raw_latency(_I0, _I1) -> + 1. + +war_latency(_I0, _I1) -> + 0. + +waw_latency(_I0, _I1) -> + 1. + +%% *** UNFINISHED *** +%% At present, all load/stores are assumed to hit in the L1 cache, +%% which isn't really satisfying. + +%% m_raw_latency(_St, _Ld) -> +%% 1. +%% +%% m_war_latency(_Ld, _St) -> +%% 1. +%% +%% m_waw_latency(_St1, _St2) -> +%% 1. + +%% Use these for 'default latencies' = do not permit reordering. + +m_raw_latency() -> + 1. + +m_war_latency() -> + 1. + +m_waw_latency() -> + 1. + +br_to_unsafe_latency(_BrTy, _UTy) -> + 0. + +unsafe_to_br_latency(_UTy, _BrTy) -> + 0. + +br_br_latency(_BrTy1, _BrTy2) -> + 0. diff --git a/lib/hipe/opt/hipe_ultra_prio.erl b/lib/hipe/opt/hipe_ultra_prio.erl new file mode 100644 index 0000000000..9e2c1a0489 --- /dev/null +++ b/lib/hipe/opt/hipe_ultra_prio.erl @@ -0,0 +1,304 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% PRIORITY HANDLING AND PRIORITY CALCULATION +%% +%% Handling of ready nodes and priorities. +%% Priorities are mainly from the critical path. More priorities are added. +%% * One version is adding priorities just depending on the instr, so +%% for example loads get higher priority than stores, and ordered +%% after reg's and offset for better cache performance. +%% * The other version gives higher priority to a node that adds more new +%% nodes to the ready list. This one is maybe not so effectively +%% implemented, but was added too late for smarter solutions. +%% One version is commented away + +-module(hipe_ultra_prio). +-export([init_ready/2, + init_instr_prio/2, + %% initial_ready_set/4, + next_ready/7, + add_ready_nodes/2, + insert_node/3 + ]). + +-include("../sparc/hipe_sparc.hrl"). + +% At first, only nodes with no predecessors are selected. +% - if R is empty, there is an error (unless BB itself is empty) + +%% Arguments : Size - size of ready-array +%% Preds - array with number of predecessors for each node +%% Returns : An array with list of ready-nodes for each cycle. + +init_ready(Size, Preds) -> + P = hipe_vectors:size(Preds), + Ready = hipe_vectors:new(Size, []), + R = initial_ready_set(1, P, Preds, []), + hipe_vectors:set(Ready, 0, R). + +init_instr_prio(N, DAG) -> + critical_path(N, DAG). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : initial_ready_set +%% Argument : M - current node-index +%% N - where to stop +%% Preds - array with number of predecessors for each node +%% Ready - list with ready-nodes +%% Returns : Ready - list with ready-nodes +%% Description : Finds all nodes with no predecessors and adds them to ready. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +initial_ready_set(M, N, Preds, Ready) -> + if + M > N -> + Ready; + true -> + case hipe_vectors:get(Preds, M-1) of + 0 -> + initial_ready_set(M+1, N, Preds, [M|Ready]); + V when is_integer(V), V > 0 -> + initial_ready_set(M+1, N, Preds, Ready) + end + end. + +%% The following handles the nodes ready to schedule: +%% 1. select the ready queue of given cycle +%% 2. if queue empty, return none +%% 3. otherwise, remove entry with highest priority +%% and return {next,Highest_Prio,NewReady} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : next_ready +%% Argument : C - current cycle +%% Ready - array with ready nodes +%% Prio - array with cpath-priorities for all nodes +%% Nodes - indexed list [{N, Instr}] +%% Returns : none / {next,Highest_Prio,NewReady} +%% Description : 1. select the ready queue of given cycle +%% 2. if queue empty, return none +%% 3. otherwise, remove entry with highest priority +%% and return {next,Highest_Prio,NewReady} where Highest_Prio +%% = Id of instr and NewReady = updated ready-array. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +next_ready(C, Ready, Prio, Nodes, DAG, Preds, Earl) -> + Curr = hipe_vectors:get(Ready, C-1), + case Curr of + [] -> + none; + Instrs -> + {BestI,RestIs} = + get_best_instr(Instrs, Prio, Nodes, DAG, Preds, Earl, C), + {next,BestI,hipe_vectors:set(Ready,C-1,RestIs)} + end. + +% next_ready(C,Ready,Prio,Nodes) -> +% Curr = hipe_vectors:get(Ready,C-1), +% case Curr of +% [] -> +% none; +% Instrs -> +% {BestInstr,RestInstrs} = get_best_instr(Instrs, Prio, Nodes), +% {next,BestInstr,hipe_vectors:set(Ready,C-1,RestInstrs)} +% end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : get_best_instr +%% Argument : Instrs - list of node-id's +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : {BestSoFar, Rest} - Id of best instr and the rest of id's +%% Description : Returns the id of the instr that is the best choice. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_best_instr([Instr|Instrs], Prio, Nodes, DAG, Preds, Earl, C) -> + get_best_instr(Instrs, [], Instr, Prio, Nodes, DAG, Preds, Earl, C). + +get_best_instr([], Rest, BestSoFar, _Prio, _Nodes, _DAG, _Preds, _Earl, _C) -> + {BestSoFar, Rest}; +get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes, + DAG, Preds, Earl, C) -> + case better(Instr, BestSoFar, Prio, Nodes, DAG, Preds, Earl, C) of + true -> + get_best_instr(Instrs, [BestSoFar|PassedInstrs], + Instr, Prio, Nodes, DAG, Preds, Earl, C); + false -> + get_best_instr(Instrs, [Instr|PassedInstrs], BestSoFar, Prio, + Nodes, DAG, Preds, Earl, C) + end. + +% get_best_instr([Instr|Instrs], Prio, Nodes) -> +% get_best_instr(Instrs, [], Instr, Prio, Nodes). + +% get_best_instr([], Rest, BestSoFar, Prio, Nodes) -> {BestSoFar, Rest}; +% get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes) -> +% case better(Instr, BestSoFar, Prio, Nodes) of +% true -> +% get_best_instr(Instrs, [BestSoFar|PassedInstrs], +% Instr, Prio, Nodes); +% false -> +% get_best_instr(Instrs, [Instr|PassedInstrs],BestSoFar, Prio, Nodes) +% end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : better +%% Argument : Instr1 - Id of instr 1 +%% Instr2 - Id of instr 2 +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : true if Instr1 has higher priority than Instr2 +%% Description : Checks if Instr1 is a better choice than Instr2 for scheduling +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +better(Instr1, Instr2, Prio, Nodes, DAG, Preds, Earl, C) -> + better_hlp(priority(Instr1, Prio, Nodes, DAG, Preds, Earl, C), + priority(Instr2, Prio, Nodes, DAG, Preds, Earl, C)). + +better_hlp([], []) -> false; +better_hlp([], [_|_]) -> false; +better_hlp([_|_], []) -> true; +better_hlp([X|Xs], [Y|Ys]) -> (X > Y) or ((X =:= Y) and better_hlp(Xs,Ys)). + +%% +%% Returns the instr corresponding to id +%% +get_instr(InstrId, [{InstrId,Instr}|_]) -> Instr; +get_instr(InstrId, [_|Xs]) -> get_instr(InstrId, Xs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : priority +%% Argument : InstrId - Id +%% Prio - array with cpath-priorities for the nodes +%% Nodes - indexed list [{Id, Instr}] +%% Returns : PrioList - list of priorities [MostSignificant, LessSign, ...] +%% Description : Returns a list of priorities where the first element is the +%% cpath-priority and the rest are added depending on what kind +%% of instr it is. Used to order loads/stores sequentially and +%% there is possibility to add whatever stuff... +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +priority(InstrId, Prio, Nodes, DAG, Preds, Earl, C) -> + {ReadyNodes,_,_,_} = hipe_schedule:delete_node(C,InstrId,DAG,Preds,Earl), + Instr = get_instr(InstrId, Nodes), + Prio1 = hipe_vectors:get(Prio, InstrId-1), + Prio2 = length(ReadyNodes), + PrioRest = + case Instr of + #load_atom{} -> + [3]; + #move{} -> + [3]; + #load{} -> + Src = hipe_sparc:load_src(Instr), + Off = hipe_sparc:load_off(Instr), + case hipe_sparc:is_reg(Off) of + false -> [3, + -(hipe_sparc:reg_nr(Src)), + -(hipe_sparc:imm_value(Off))]; + true -> [1] + end; + #store{} -> + Src = hipe_sparc:store_dest(Instr), + Off = hipe_sparc:store_off(Instr), + case hipe_sparc:is_reg(Off) of + false -> [2, + -(hipe_sparc:reg_nr(Src)), + -(hipe_sparc:imm_value(Off))]; + true -> [1] + end; + _ -> [0] + end, + [Prio1,Prio2|PrioRest]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : add_ready_nodes +%% Argument : Nodes - list of [{Cycle,Id}] +%% Ready - array of ready nodes for all cycles +%% Returns : NewReady - updated ready-array +%% Description : Gets a list of instrs and adds them to the ready-array +%% to the corresponding cycle. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +add_ready_nodes([], Ready) -> Ready; +add_ready_nodes([{C,I}|Xs], Ready) -> + add_ready_nodes(Xs, insert_node(C, I, Ready)). + +insert_node(C, I, Ready) -> + Old = hipe_vectors:get(Ready, C-1), + hipe_vectors:set(Ready, C-1, [I|Old]). + +%% +%% Computes the latency for the "most expensive" way through the graph +%% for all nodes. Returns an array of priorities for all nodes. +%% +critical_path(N, DAG) -> + critical_path(1, N, DAG, hipe_vectors:new(N, -1)). + +critical_path(M, N, DAG, Prio) -> + if + M > N -> + Prio; + true -> + critical_path(M+1, N, DAG, cpath(M, DAG, Prio)) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Function : cpath +%% Argument : M - current node id +%% DAG - the dependence graph +%% Prio - array of priorities for all nodes +%% Returns : Prio - updated prio array +%% Description : If node has prio -1, it has not been visited +%% - otherwise, compute priority as max of priorities of +%% successors (+ latency) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +cpath(M, DAG, Prio) -> + InitPrio = hipe_vectors:get(Prio, M-1), + if + InitPrio =:= -1 -> + cpath_node(M, DAG, Prio); + true -> + Prio + end. + +cpath_node(N, DAG, Prio) -> + SuccL = dag_succ(DAG, N), + {Max, NewPrio} = cpath_succ(SuccL, DAG, Prio), + hipe_vectors:set(NewPrio, N-1, Max). + +cpath_succ(SuccL, DAG, Prio) -> + cpath_succ(SuccL, DAG, Prio, 0). + +%% performs an unnecessary lookup of priority of Succ, but that might +%% not be such a big deal + +cpath_succ([], _DAG, Prio, NodePrio) -> {NodePrio,Prio}; +cpath_succ([{Lat,Succ}|Xs], DAG, Prio, NodePrio) -> + NewPrio = cpath(Succ, DAG, Prio), + NewNodePrio = erlang:max(hipe_vectors:get(NewPrio, Succ - 1) + Lat, NodePrio), + cpath_succ(Xs, DAG, NewPrio, NewNodePrio). + +dag_succ(DAG, N) when is_integer(N) -> + hipe_vectors:get(DAG, N-1). + -- cgit v1.2.3