aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/opt
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/opt
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/hipe/opt')
-rw-r--r--lib/hipe/opt/Makefile101
-rw-r--r--lib/hipe/opt/hipe_schedule.erl1489
-rw-r--r--lib/hipe/opt/hipe_schedule_prio.erl58
-rw-r--r--lib/hipe/opt/hipe_spillmin.erl111
-rw-r--r--lib/hipe/opt/hipe_spillmin_color.erl556
-rw-r--r--lib/hipe/opt/hipe_spillmin_scan.erl559
-rw-r--r--lib/hipe/opt/hipe_target_machine.erl93
-rw-r--r--lib/hipe/opt/hipe_ultra_mod2.erl239
-rw-r--r--lib/hipe/opt/hipe_ultra_prio.erl304
9 files changed, 3510 insertions, 0 deletions
diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile
new file mode 100644
index 0000000000..972cf63944
--- /dev/null
+++ b/lib/hipe/opt/Makefile
@@ -0,0 +1,101 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 2001-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+ifndef EBIN
+EBIN = ../ebin
+endif
+
+ifndef DOCS
+DOCS = ../doc
+endif
+
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(HIPE_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan
+
+HRL_FILES=
+ERL_FILES= $(MODULES:%=%.erl)
+TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR))
+DOC_FILES= $(MODULES:%=$(DOCS)/%.html)
+
+# APP_FILE=
+# APP_SRC= $(APP_FILE).src
+# APP_TARGET= $(EBIN)/$(APP_FILE)
+#
+# APPUP_FILE=
+# APPUP_SRC= $(APPUP_FILE).src
+# APPUP_TARGET= $(EBIN)/$(APPUP_FILE)
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+
+include ../native.mk
+
+ERL_COMPILE_FLAGS += +warn_exported_vars +warn_missing_spec +warn_untyped_record
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+debug opt: $(TARGET_FILES)
+
+docs: $(DOC_FILES)
+
+clean:
+ rm -f $(TARGET_FILES)
+ rm -f core
+
+$(DOCS)/%.html:%.erl
+ erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+
+release_docs_spec:
+
+$(EBIN)/hipe_spillmin.beam: ../main/hipe.hrl ../flow/cfg.hrl
+$(EBIN)/hipe_spillmin_color.beam: ../main/hipe.hrl ../flow/cfg.hrl
+$(EBIN)/hipe_spillmin_scan.beam: ../main/hipe.hrl ../flow/cfg.hrl
diff --git a/lib/hipe/opt/hipe_schedule.erl b/lib/hipe/opt/hipe_schedule.erl
new file mode 100644
index 0000000000..4925b2927b
--- /dev/null
+++ b/lib/hipe/opt/hipe_schedule.erl
@@ -0,0 +1,1489 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% INSTRUCTION SCHEDULER
+%%
+%% This is a basic ILP cycle scheduler:
+%% * set cycle = 0
+%% * while ready[cycle] nonempty do
+%% - take x with greatest priority from ready[cycle]
+%% - try to schedule x;
+%% * if scheduling x was possible,
+%% - reserve resources
+%% - add x to schedule and delete x from dag
+%% - update earliest-time for all successor nodes
+%% as max[earliest[y],cycle+latency[x]]
+%% - if some node y now has no predecessors,
+%% add y to ready[earliest[y]]
+%% * if it was impossible, put x in ready[cycle+1]
+%% (= try again)
+%%
+%% We use the following data structures:
+%% 1. all nodes are numbered and indices used as array keys
+%% 2. priority per node can be computed statically or dynamically
+%% * statically: before scheduling, each node gets a priority value
+%% * dynamically: at each cycle, compute priorities for all ready nodes
+%% 3. earliest: earliest cycle of issue, starts at 0
+%% and is updated as predecessors issue
+%% 4. predecessors: number of predecessors (0 = ready to issue)
+%% 5. successors: list of {Latency,NodeID}
+%% 6. ready: an array indexed by cycle-time (integer), where
+%% ready nodes are kept.
+%% 7. resources: a resource representation (ADT) that answers
+%% certain queries, e.g., "can x be scheduled this cycle"
+%% and "reserve resources for x".
+%% 8. schedule: list of scheduled instructions {Instr,Cycle}
+%% in the order of issue
+%% 9. instructions: maps IDs back to instructions
+%%
+%% Inputs:
+%% - a list of {ID,Node} pairs (where ID is a unique key)
+%% - a dependence list {ID0,Latency,ID1}, which is used to
+%% build the DAG.
+%%
+%% Note that there is some leeway in how things are represented
+%% from here.
+%%
+%% MODIFICATIONS:
+%% - Some basic blocks are not worth scheduling (e.g., GC save/restore code)
+%% yet are pretty voluminous. How do we skip them?
+%% - Scheduling should be done at finalization time: when basic block is
+%% linearized and is definitely at Sparc assembly level, THEN reorder
+%% stuff.
+
+-module(hipe_schedule).
+-export([cfg/1, est_cfg/1, delete_node/5]).
+
+-include("../sparc/hipe_sparc.hrl").
+
+%%-define(debug1,true).
+
+-define(debug2(Str,Args),ok).
+%%-define(debug2(Str,Args),io:format(Str,Args)).
+
+-define(debug3(Str,Args),ok).
+%%-define(debug3(Str,Args),io:format(Str,Args)).
+
+-define(debug4(Str,Args),ok).
+%%-define(debug4(Str,Args),io:format(Str,Args)).
+
+-define(debug5(Str,Args),ok).
+%%-define(debug5(Str,Args),io:format(Str,Args)).
+
+-define(debug(Str,Args),ok).
+%%-define(debug(Str,Args),io:format(Str,Args)).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cfg
+%% Argument : CFG - the control flow graph
+%% Returns : CFG - A new cfg with scheduled blocks
+%% Description : Takes each basic block and schedules them one by one.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cfg(CFG) ->
+ ?debug3("CFG: ~n~p", [CFG]),
+ update_all( [ {L,
+ hipe_bb:mk_bb(
+ block(L,hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))) )}
+ || L <- hipe_sparc_cfg:labels(CFG) ], CFG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : update_all
+%% Argument : Blocks - [{Label, Block}] , a list with labels and new code
+%% used for updating the old CFG.
+%% CFG - The old controlflow graph
+%% Returns : An updated controlflow graph.
+%% Description : Just swappes the basic blocks in the CFG to the scheduled one.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+update_all([],CFG) -> CFG;
+update_all([{L,NewB}|Ls],CFG) ->
+ update_all(Ls,hipe_sparc_cfg:bb_add(CFG,L,NewB)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+est_cfg(CFG) ->
+ update_all([ {L, hipe_bb:mk_bb(est_block(hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))))}
+ || L <- hipe_sparc_cfg:labels(CFG) ], CFG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Provides an estimation of how quickly a block will execute.
+%% This is done by chaining all instructions in sequential order
+%% by 0-cycle dependences (which means they will never be reordered),
+%% then scheduling the mess.
+
+est_block([]) -> [];
+est_block([I]) -> [I];
+est_block(Blk) ->
+ {IxBlk,DAG} = est_deps(Blk),
+ Sch = bb(IxBlk,DAG),
+ separate_block(Sch,IxBlk).
+
+est_deps(Blk) ->
+ IxBlk = indexed_bb(Blk),
+ DAG = deps(IxBlk),
+ {IxBlk, chain_instrs(IxBlk,DAG)}.
+
+chain_instrs([{N,_}|Xs],DAG) ->
+ chain_i(N,Xs,DAG).
+
+chain_i(_,[],DAG) -> DAG;
+chain_i(N,[{M,_}|Xs],DAG) ->
+ NewDAG = dep_arc(N,zero_latency(),M,DAG),
+ chain_i(M,Xs,NewDAG).
+
+zero_latency() -> 0.
+
+lookup_instr([{N,I}|_], N) -> I;
+lookup_instr([_|Xs], N) -> lookup_instr(Xs, N).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : block
+%% Argument : Instrs - [Instr], list of all the instructions in a basic
+%% block.
+%% Returns : A new scheduled block
+%% Description : Schedule a basic block
+%%
+%% Note: does not consider delay slots!
+%% (another argument for using only annulled delay slots?)
+%% * how do we add delay slots? somewhat tricky to
+%% reconcile with the sort of scheduling we consider.
+%% (as-early-as-possible)
+%% => rewrite scheduler into as-late-as-possible?
+%% (=> just reverse the dependence arcs??)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Don't fire up the scheduler if there's no work to do.
+block(_, []) ->
+ [];
+block(_L, [I]) ->
+ case hipe_sparc:is_any_branch(I) of
+ true -> [hipe_sparc:nop_create(), I];
+ false -> [I]
+ end;
+block(_L, Blk) ->
+ IxBlk = indexed_bb(Blk),
+ case IxBlk of
+ [{_N, I}] -> % comments and nops may have been removed.
+ case hipe_sparc:is_any_branch(I) of
+ true -> [hipe_sparc:nop_create(), I];
+ false -> [I]
+ end;
+ _ ->
+ Sch = bb(IxBlk, {DAG, _Preds} = deps(IxBlk)),
+ {NewSch, NewIxBlk} = fill_delays(Sch, IxBlk, DAG),
+ X = finalize_block(NewSch, NewIxBlk),
+ debug1_stuff(Blk, DAG, IxBlk, Sch, X),
+ X
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : fill_delays
+%% Argument : Sch - List of {{cycle, C}, {node, N}} : C = current cycle
+%% N = node index
+%% IxBlk - Indexed block [{N, Instr}]
+%% DAG - Dependence graph
+%% Returns : {NewSch, NewIxBlk} - vector with new schedule and vector
+%% with {N, Instr}
+%% Description : Goes through the schedule from back to front looking for
+%% branches/jumps. If one is found fill_del tries to find
+%% an instr to fill the delayslot.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+fill_delays(Sch, IxBlk, DAG) ->
+ NewIxBlk = hipe_vectors:list_to_vector(IxBlk),
+ %% NewSch = hipe_vectors:list_to_vector(Sch),
+ NewSch = fill_del(length(Sch), hipe_vectors:list_to_vector(Sch),
+ NewIxBlk, DAG),
+ {NewSch, NewIxBlk}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : fill_del
+%% Argument : N - current index in the schedule
+%% Sch - schedule
+%% IxBlk - indexed block
+%% DAG - dependence graph
+%% Returns : Sch - New schedule with possibly a delay instr in the last
+%% position.
+%% Description : If a call/jump is found fill_branch_delay/fill_call_delay
+%% is called to find a delay-filler.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+fill_del(N, Sch, _IxBlk, _DAG) when N < 1 -> Sch;
+fill_del(N, Sch, IxBlk, DAG) ->
+ Index = get_index(Sch, N),
+ ?debug2("Index for ~p: ~p~nInstr: ~p~n",
+ [N, Index, get_instr(IxBlk, Index)]),
+ NewSch =
+ case get_instr(IxBlk, Index) of
+ #call_link{} ->
+ fill_branch_delay(N - 1, N, Sch, IxBlk, DAG);
+ #jmp_link{} ->
+ fill_call_delay(N - 1, N, Sch, IxBlk, DAG);
+ #jmp{} ->
+ fill_call_delay(N - 1, N, Sch, IxBlk, DAG);
+ #b{} ->
+ fill_branch_delay(N - 1, N, Sch, IxBlk, DAG);
+ #br{} ->
+ fill_branch_delay(N - 1, N, Sch, IxBlk, DAG);
+ #goto{} ->
+ fill_branch_delay(N - 1, N, Sch, IxBlk, DAG);
+ _Other ->
+ Sch
+ end,
+ NewSch.
+ %% fill_del(N - 1, NewSch, IxBlk, DAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : fill_call_delay
+%% Argument : Cand - index in schedule of delay-candidate
+%% Call - index in schedule of call
+%% Sch - schedule vector: < {{cycle,Ci},{node,Nj}}, ... >
+%% IxBlk - block vector: < {N, Instr1}, {N+1, Instr2} ... >
+%% DAG - dependence graph
+%% Returns : Sch - new updated schedule.
+%% Description : Searches backwards through the schedule trying to find an
+%% instr without conflicts with the Call-instr.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+fill_call_delay(Cand, _Call, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch;
+fill_call_delay(Cand, Call, Sch, IxBlk, DAG) ->
+ CandIndex = get_index(Sch, Cand),
+ CallIndex = get_index(Sch, Call),
+ CandI = get_instr(IxBlk, CandIndex),
+ case move_or_alu(CandI) of
+ true ->
+ case single_depend(CandIndex, CallIndex, DAG) of
+ false -> % Other instrs depends on Cand ...
+ fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG);
+
+ true ->
+ CallI = get_instr(IxBlk, CallIndex),
+
+ CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)),
+ %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)),
+ %% CallDefs = ordsets:from_list(hipe_sparc:defines(CallI)),
+ CallUses = ordsets:from_list(hipe_sparc:uses(CallI)),
+
+ Args = case CallI of
+ #jmp_link{} ->
+ ordsets:from_list(
+ hipe_sparc:jmp_link_args(CallI));
+ #jmp{} ->
+ ordsets:from_list(hipe_sparc:jmp_args(CallI));
+ #call_link{} ->
+ ordsets:from_list(
+ hipe_sparc:call_link_args(CallI))
+ end,
+ CallUses2 = ordsets:subtract(CallUses, Args),
+ Conflict = ordsets:intersection(CandDefs, CallUses2),
+ %% io:format("single_depend -> true:~n ~p~n, ~p~n,~p~n",[CandI,CallI,DAG]),
+ %% io:format("Cand = ~p~nCall = ~p~n",[CandI,CallI]),
+ %% io:format("CandDefs = ~p~nCallDefs = ~p~n",[CandDefs,CallDefs]),
+ %% io:format("CandUses = ~p~nCallUses = ~p~n",[CandUses,CallUses]),
+ %% io:format("Args = ~p~nCallUses2 = ~p~n",[Args,CallUses2]),
+ %% io:format("Conflict = ~p~n",[Conflict]),
+
+ case Conflict of
+ [] -> % No conflicts ==> Cand can fill delayslot after Call
+ update_schedule(Cand, Call, Sch);
+ _ -> % Conflict: try with preceeding instrs
+ fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG)
+ end
+ end;
+ false ->
+ fill_call_delay(Cand - 1, Call, Sch, IxBlk, DAG)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : fill_branch_delay
+%% Argument : Cand - index in schedule of delay-candidate
+%% Branch - index in schedule of branch
+%% Sch - schedule
+%% IxBlk - indexed block
+%% DAG - dependence graph
+%% Returns : Sch - new updated schedule.
+%% Description : Searches backwards through the schedule trying to find an
+%% instr without conflicts with the Branch-instr.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+fill_branch_delay(Cand, _Br, Sch, _IxBlk, _DAG) when Cand < 1 -> Sch;
+fill_branch_delay(Cand, Br, Sch, IxBlk, DAG) ->
+ CandIndex = get_index(Sch, Cand),
+ BrIndex = get_index(Sch, Br),
+ CandI = get_instr(IxBlk, CandIndex),
+ case move_or_alu(CandI) of
+ true ->
+ case single_depend(CandIndex, BrIndex, DAG) of
+ false -> % Other instrs depends on Cand ...
+ fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG);
+
+ true ->
+ BrI = get_instr(IxBlk, BrIndex),
+ CandDefs = ordsets:from_list(hipe_sparc:defines(CandI)),
+ %% CandUses = ordsets:from_list(hipe_sparc:uses(CandI)),
+ %% BrDefs = ordsets:from_list(hipe_sparc:defines(BrI)),
+ BrUses = ordsets:from_list(hipe_sparc:uses(BrI)),
+
+ Conflict = ordsets:intersection(CandDefs, BrUses),
+ %% io:format("single_depend -> true: ~p~n, ~p~n,~p~n", [CandI, BrI, DAG]),
+ %% io:format("Cand = ~p~nBr = ~p~n",[CandI,BrI]),
+ %% io:format("CandDefs = ~p~nBrDefs = ~p~n",[CandDefs,BrDefs]),
+ %% io:format("CandUses = ~p~nBrUses = ~p~n",[CandUses,BrUses]),
+ %% io:format("Conflict = ~p~n",[Conflict]);
+
+ case Conflict of
+ [] -> % No conflicts ==>
+ % Cand can fill delayslot after Branch
+ update_schedule(Cand, Br, Sch);
+ _ -> % Conflict: try with preceeding instrs
+ fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG)
+ end
+ end;
+ false ->
+ fill_branch_delay(Cand - 1, Br, Sch, IxBlk, DAG)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : update_schedule
+%% Argument : From - the position from where to switch indexes in Sch
+%% To - the position to where to switch indexes in Sch
+%% Sch - schedule
+%% Returns : Sch - an updated schedule
+%% Description : If From is the delay-filler and To is the Call/jump, the
+%% schedule is updated so From gets index To, To gets index
+%% To - 1, and the nodes between From and To gets old_index - 1.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+update_schedule(To, To, Sch) ->
+ {{cycle, C}, {node, _N} = Node} = hipe_vectors:get(Sch, To-1),
+ hipe_vectors:set(Sch, To-1, {{cycle, C+1}, Node});
+update_schedule(From, To, Sch) ->
+ Temp = hipe_vectors:get(Sch, From-1),
+ Sch1 = hipe_vectors:set(Sch, From-1, hipe_vectors:get(Sch, From)),
+ update_schedule(From + 1, To, hipe_vectors:set(Sch1, From, Temp)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : single_depend
+%% Argument : N - Index of the delayslot candidate
+%% M - Index of the node that N possibly has a single
+%% depend to.
+%% DAG - The dependence graph
+%% Returns : true if no other nodes than N os depending on N
+%% Description : Checks that no other nodes than M depends on N and that the
+%% latency between them is zero or 1.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+single_depend(N, M, DAG) ->
+ Deps = hipe_vectors:get(DAG, N-1),
+ single_depend(M, Deps).
+
+single_depend(_N, []) -> true;
+single_depend(N, [{0, N}]) -> true;
+single_depend(N, [{1, N}]) -> true;
+single_depend(_N, [{_Lat, _}|_]) -> false.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : get_index
+%% Argument : Sch - schedule
+%% N - index in schedule
+%% Returns : Index - index of the node
+%% Description : Returns the index of the node on position N in the schedule.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+get_index(Sch, N) ->
+ {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch,N-1),
+ Index.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : get_instr
+%% Argument : IxBlk - indexed block
+%% N - index in block
+%% Returns : Instr
+%% Description : Returns the instr on position N in the indexed block.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+get_instr(IxBlk, N) ->
+ {_, Instr} = hipe_vectors:get(IxBlk, N-1),
+ Instr.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : get_instr
+%% Argument : Sch - schedule
+%% IxBlk - indexed block
+%% N - index in schedule
+%% Returns : Instr
+%% Description : Returns the instr on position N in the schedule.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+get_instr(Sch, IxBlk, N) ->
+ {{cycle, _C}, {node, Index}} = hipe_vectors:get(Sch, N-1),
+ {_, Instr} = hipe_vectors:get(IxBlk, Index-1),
+ Instr.
+
+separate_block(Sch,IxBlk) ->
+ sep_comments([{C,lookup_instr(IxBlk,N)} || {{cycle,C},{node,N}} <- Sch]).
+
+sep_comments([]) -> [];
+sep_comments([{C,I}|Xs]) ->
+ [hipe_sparc:comment_create({cycle,C}), I | sep_comments(Xs,C)].
+
+sep_comments([], _) -> [];
+sep_comments([{C1,I}|Xs], C0) ->
+ if
+ C1 > C0 ->
+ [hipe_sparc:comment_create({cycle,C1}),I|sep_comments(Xs,C1)];
+ true ->
+ [I|sep_comments(Xs, C0)]
+ end.
+
+finalize_block(Sch, IxBlk) ->
+ ?debug5("Sch: ~p~nIxBlk: ~p~n",[Sch,IxBlk]),
+ finalize_block(1, hipe_vectors:size(Sch), 1, Sch, IxBlk, []).
+
+finalize_block(N, End, _C, Sch, IxBlk, _Instrs) when N =:= End - 1 ->
+ NextLast = get_instr(Sch, IxBlk, N),
+ Last = get_instr(Sch, IxBlk, End),
+ ?debug5("NextLast: ~p~nLast: ~p~n",[NextLast,Last]),
+ case hipe_sparc:is_any_branch(Last) of
+ true -> % Couldn't fill delayslot ==> add NOP
+ [NextLast , hipe_sparc:nop_create(), Last];
+ false -> % Last is a delayslot-filler ==> change order...
+ [Last, NextLast]
+ end;
+finalize_block(N, End, C0, Sch, IxBlk, Instrs) ->
+ {{cycle, _C1}, {node, _M}} = hipe_vectors:get(Sch, N-1),
+ Instr = get_instr(Sch, IxBlk, N),
+ ?debug5("Instr: ~p~n~n",[Instr]),
+ [Instr | finalize_block(N + 1, End, C0, Sch, IxBlk, Instrs)].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : bb
+%% Argument : IxBlk - indexed block
+%% DAG - {Dag, Preds} where Dag is dependence graph and
+%% Preds is number of predecessors for each node.
+%% Returns : Sch
+%% Description : Initializes earliest-list, ready-list, priorities, resources
+%% and so on, and calls the cycle_sched which does the scheduling
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+bb(IxBlk,DAG) ->
+ bb(length(IxBlk), IxBlk, DAG).
+
+bb(N,IxBlk,{DAG, Preds}) ->
+ Earliest = init_earliest(N),
+ BigArray = N*10, % "nothing" is this big :-)
+ Ready = hipe_schedule_prio:init_ready(BigArray,Preds),
+ I_res = init_instr_resources(N, IxBlk),
+
+ Prio = hipe_schedule_prio:init_instr_prio(N,DAG),
+ Rsrc = init_resources(BigArray),
+ ?debug4("I_res: ~n~p~nPrio: ~n~p~nRsrc: ~n~p~n", [I_res,Prio,Rsrc]),
+ ?debug('cycle 1~n',[]),
+ Sch = empty_schedule(),
+ cycle_sched(1,Ready,DAG,Preds,Earliest,Rsrc,I_res,Prio,Sch,N,IxBlk).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cycle_sched
+%% Argument : - C is current cycle, 1 or more.
+%% - Ready is an array (Cycle -> [Node])
+%% yielding the collection of nodes ready to be
+%% scheduled in a cycle.
+%% - DAG is an array (Instr -> [{Latency,Instr}])
+%% represents the dependence DAG.
+%% - Preds is an array (Instr -> NumPreds)
+%% counts the number of predecessors
+%% (0 preds = ready to be scheduled).
+%% - Earl is an array (Instr -> EarliestCycle)
+%% holds the earliest cycle an instruction can be scheduled.
+%% - Rsrc is a 'resource ADT' that handles scheduler resource
+%% management checks whether instruction can be scheduled
+%% this cycle without a stall.
+%% - I_res is an array (Instr -> Required_resources)
+%% holds the resources required to schedule an instruction.
+%% - Sch is the representation of the schedule current schedule.
+%% - N is the number of nodes remaining to be scheduled
+%% tells us when to stop the scheduler.
+%% - IxBlk is the indexed block with instrs
+%% Returns : present schedule
+%% Description : Scheduler main loop.
+%% Pick next ready node in priority order for cycle C until
+%% none remain.
+%% * check each node if it can be scheduled w/o stalling
+%% * if so, schedule it
+%% * otherwise, bump the node to the next cycle
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cycle_sched(C,Ready,DAG,Preds,Earl,Rsrc,I_res,Prio,Sch,N,IxBlk) ->
+ case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk,DAG,Preds,Earl) of
+% case hipe_schedule_prio:next_ready(C,Ready,Prio,IxBlk) of
+ {next,I,Ready1} ->
+ ?debug('try ~p~n==> ready = ~p~n',[I, Ready1]),
+ case resources_available(C,I,Rsrc,I_res) of
+ {yes,NewRsrc} ->
+ ?debug(' scheduled~n==> Rscrs = ~p~n',[NewRsrc]),
+ NewSch = add_to_schedule(I,C,Sch),
+ {ReadyNs,NewDAG,NewPreds,NewEarl} =
+ delete_node(C,I,DAG,Preds,Earl),
+ ?debug("NewPreds : ~p~n",[Preds]),
+ ?debug(' ReadyNs: ~p~n',[ReadyNs]),
+ NewReady = hipe_schedule_prio:add_ready_nodes(ReadyNs,
+ Ready1),
+ ?debug(' New ready: ~p~n',[NewReady]),
+ cycle_sched(C,NewReady,NewDAG,NewPreds,NewEarl,
+ NewRsrc,I_res,Prio,NewSch,N-1, IxBlk);
+ no ->
+ ?debug(' resource conflict~n',[]),
+ NewReady = hipe_schedule_prio:insert_node(C+1,I,Ready1),
+ cycle_sched(C,NewReady,DAG,Preds,Earl,Rsrc,
+ I_res,Prio,Sch,N,IxBlk)
+ end;
+ none -> % schedule next cycle if some node remains
+ if
+ N > 0 ->
+ ?debug('cycle ~p~n',[C+1]),
+ cycle_sched(C+1,Ready,DAG,Preds,Earl,
+ advance_cycle(Rsrc),
+ I_res,Prio,Sch,N, IxBlk);
+ true ->
+ present_schedule(Sch)
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : init_earliest
+%% Argument : N - number of instrs
+%% Returns :
+%% Description :
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+init_earliest(N) ->
+ hipe_vectors:new(N,1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Schedule is kept reversed until the end.
+
+-define(present_node(I,Cycle),{{cycle,Cycle},{node,I}}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : empty_schedule
+%% Description : Returns an empty schedule.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+empty_schedule() -> [].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_to_schedule
+%% Argument : I - instr
+%% Cycle - cycle when I was placed
+%% Sch - schedule
+%% Description : Adds instr to schedule
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_to_schedule(I,Cycle,Sch) ->
+ [?present_node(I,Cycle)|Sch].
+
+present_schedule(Sch) -> lists:reverse(Sch).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Interface to resource manager:
+%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : init_resources
+%% Description : Yields a 'big enough' array mapping (Cycle -> Resources);
+%% this array is called Rsrc below.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+init_resources(S) ->
+ hipe_target_machine:init_resources(S).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : init_instr_resources
+%% Argument : Nodes - a list of the instructions
+%% N - is the number of nodes
+%% Description : return a vector (NodeID -> Resource_requirements)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+init_instr_resources(N,Nodes) ->
+ hipe_target_machine:init_instr_resources(N,Nodes).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : resources_available
+%% Argument : Cycle - the current cycle
+%% I - the current instruction (index = NodeID)
+%% Rsrc - a map (Cycle -> Resources)
+%% I_res - maps (NodeID -> Resource_requirements)
+%% Description : returns {yes,NewResTab} | no
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+resources_available(Cycle,I,Rsrc,I_res) ->
+ hipe_target_machine:resources_available(Cycle,I,Rsrc,I_res).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : advance_cycle
+%% Argument : Rsrc - resources
+%% Description : Returns an empty resources-state
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+advance_cycle(Rsrc) ->
+ hipe_target_machine:advance_cycle(Rsrc).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : delete_node
+%% Argument : Cycle - current cycle
+%% I - index of instr
+%% DAG - dependence dag
+%% Preds - array with number of predecessors for nodes
+%% Earl - array with earliest-times for nodes
+%% Returns : {ReadyNs,NewDAG,NewPreds,NewEarl}
+%% Description : Deletes node I and updates earliest times for the rest.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+delete_node(Cycle,I,DAG,Preds,Earl) ->
+ Succ = hipe_vectors:get(DAG,I-1),
+ NewDAG = hipe_vectors:set(DAG,I-1,scheduled), % provides debug 'support'
+ {ReadyNs,NewPreds,NewEarl} = update_earliest(Succ,Cycle,Preds,Earl,[]),
+ ?debug('earliest after ~p: ~p~n',[I,[{Ix+1,V} || {Ix,V} <- hipe_vectors:list(NewEarl)]]),
+ {ReadyNs,NewDAG,NewPreds,NewEarl}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : update_earliest
+%% Argument : Succ - successor list
+%% Cycle - current cycle
+%% Preds - predecessors
+%% Earl - earliest times for nodes
+%% Ready - array with readynodes for cycles
+%% Returns : {Ready,Preds,Earl}
+%% Description : Updates the earliest times for nodes and updates number of
+%% predecessors for nodes
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+update_earliest([],_Cycle,Preds,Earl,Ready) ->
+ {Ready,Preds,Earl};
+update_earliest([{Lat,N}|Xs],Cycle,Preds,Earl,Ready) ->
+ Old_earl = hipe_vectors:get(Earl,N-1),
+ New_earl = erlang:max(Old_earl,Cycle+Lat),
+ NewEarl = hipe_vectors:set(Earl,N-1,New_earl),
+ Num_preds = hipe_vectors:get(Preds,N-1),
+ NewPreds = hipe_vectors:set(Preds,N-1,Num_preds-1),
+ if
+ Num_preds =:= 0 ->
+ ?debug('inconsistent DAG~n',[]),
+ exit({update_earliest,N});
+ Num_preds =:= 1 ->
+ NewReady = [{New_earl,N}|Ready],
+ NewPreds2 = hipe_vectors:set(NewPreds,N-1,0),
+ update_earliest(Xs,Cycle,NewPreds2,NewEarl,NewReady);
+ is_integer(Num_preds), Num_preds > 1 ->
+ update_earliest(Xs,Cycle,NewPreds,NewEarl,Ready)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Collect instruction dependences.
+%%
+%% Three forms:
+%% - data/register
+%% * insert RAW, WAR, WAW dependences
+%% - memory
+%% * stores serialize memory references
+%% * alias analysis may allow loads to bypass stores
+%% - control
+%% * unsafe operations are 'trapped' between branches
+%% * branches are ordered
+%%
+%% returns { [{Index,Instr}], DepDAG }
+%% DepDAG is defined below.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : deps
+%% Argument : BB - Basic block
+%% Returns : {IxBB,DAG} - indexed block and dependence graph. DAG consists
+%% of both Dag and Preds, where Preds is number
+%% of predecessors for nodes.
+%% Description : Collect instruction dependences.
+%%
+%% Three forms:
+%% - data/register
+%% * insert RAW, WAR, WAW dependences
+%% - memory
+%% * stores serialize memory references
+%% * alias analysis may allow loads to bypass stores
+%% - control
+%% * unsafe operations are 'trapped' between branches
+%% * branches are ordered
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+deps(IxBB) ->
+ N = length(IxBB),
+ DAG = empty_dag(N), % The DAG contains both dependence-arcs and
+ % number of predeccessors...
+ {_DepTab,DAG1} = dd(IxBB, DAG),
+ DAG2 = md(IxBB, DAG1),
+ cd(IxBB, DAG2).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : empty_dag
+%% Argument : N - number of nodes
+%% Returns : empty DAG
+%% Description : DAG consists of dependence graph and predeccessors
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+empty_dag(N) ->
+ {hipe_vectors:new(N, []), hipe_vectors:new(N, 0)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : indexed_bb
+%% Argument : BB - basic block
+%% Returns : [{N, Instr}]
+%% Description : Puts indexes to all instrs of a block, removes comments.
+%% NOP's are also removed because if both sparc_schedule and
+%% sparc_post_schedule options are used, the first pass will
+%% add nop's before the branch if necessary, and these are
+%% removed before scheduling the second pass.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+indexed_bb(BB) ->
+ indexed_bb(BB,1).
+
+indexed_bb([],_N) -> [];
+indexed_bb([X|Xs],N) ->
+ case X of
+ #comment{} ->
+ indexed_bb(Xs,N);
+ #nop{} ->
+ indexed_bb(Xs,N);
+ _Other ->
+ [{N,X}|indexed_bb(Xs,N+1)]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : dep_arc
+%% Argument : N - Current node
+%% Lat - Latency from current node to M
+%% M - The dependent node
+%% DAG - The dependence graph. Consists of both DAG and
+%% predeccessors
+%% Returns : A new DAG with the arc added and number of predeccessors for
+%% M increased.
+%% Description : Adds a new arc to the graph, if an older arc goes from N to M
+%% it will be replaced with a new arc {max(OldLat, NewLat), M}.
+%% Number of predeccessors for node M is increased.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+dep_arc(N, Lat, M, {Dag,Preds}) ->
+ OldDeps = hipe_vectors:get(Dag, N-1),
+ %% io:format("{OldDeps} = {~p}~n",[OldDeps]),
+ {NewDeps, Status} = add_arc(Lat, M, OldDeps),
+ %% io:format("{NewDeps, Status} = {~p, ~p}~n",[NewDeps, Status]),
+ NewDag = hipe_vectors:set(Dag, N-1, NewDeps),
+ NewPreds = case Status of
+ added -> % just increase preds if new arc was added
+ OldPreds = hipe_vectors:get(Preds, M-1),
+ hipe_vectors:set(Preds, M-1, OldPreds + 1);
+ non_added ->
+ Preds
+ end,
+ {NewDag, NewPreds}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_arc
+%% Argument : Lat - The latency from current node to To.
+%% To - The instr-id of the node which the dependence goes to
+%% Arcs - The dependecies that are already in the dep-graph
+%% Returns : A dependence graph sorted by To.
+%% Description : A new arc that is added is sorted in the right place, and if
+%% there is already an arc between nodes A and B, the one with
+%% the greatest latency is choosen.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_arc(Lat,To, []) -> {[{Lat, To}], added};
+add_arc(Lat1, To, [{Lat2, To} | Arcs]) ->
+ {[{erlang:max(Lat1, Lat2), To} | Arcs], non_added};
+add_arc(Lat1,To1, [{Lat2, To2} | Arcs]) when To1 < To2 ->
+ {[{Lat1, To1}, {Lat2, To2} | Arcs], added};
+add_arc(Lat1 ,To1, [{Lat2, To2} | Arcs]) ->
+ {Arcs1, Status} = add_arc(Lat1, To1, Arcs),
+ {[{Lat2, To2} | Arcs1], Status}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The register/data dependence DAG of a block is represented
+%% as a mapping (Variable -> {NextWriter,NextReaders})
+%% where NextWriter is a pair {Ix,Type}
+%% and NextReaders is a list of pairs {Ix,Type}.
+%%
+%% Type is used to determine latencies of operations; on the UltraSparc,
+%% latencies of arcs (n -> m) are determined by both n and m. (E.g., if
+%% n is an integer op and m is a store, then latency is 0; if m is an
+%% integer op, it's 1.)
+
+dd([],DAG) -> { empty_deptab(), DAG };
+dd([{N,I}|Is],DAG0) ->
+ {DepTab,DAG1} = dd(Is,DAG0),
+ add_deps(N,I,DepTab,DAG1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_deps
+%% Argument : N - current node
+%% Instr - current instr
+%% DepTab - hashtable with {next-writer, next-readers} for reg
+%% DAG - dependence graph
+%% Returns : {DepTab, BlockInfo, DAG} - with new values
+%% Description : Adds dependencies for node N to the graph. The registers that
+%% node N defines and uses are used for computing the
+%% dependencies to the following nodes.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_deps(N,Instr,DepTab,DAG) ->
+ {Ds,Us} = def_use(Instr),
+ Type = dd_type(Instr),
+ {DepTab1,DAG1} = add_write_deps(Ds,N,Type,DepTab,DAG),
+ add_read_deps(Us,N,Type,DepTab1,DAG1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Instructions are classified into symbolic categories,
+%% which are subsequently used to determine operation latencies
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+dd_type(Instr) ->
+ case Instr of
+ #b{} -> branch;
+ %% #br{} -> branch;
+ #call_link{} -> branch;
+ #jmp_link{} -> branch;
+ #jmp{} -> branch;
+ #goto{} -> branch;
+ #load{} -> load;
+ #store{} -> store;
+ #alu{} -> alu;
+ #move{} -> alu;
+ #multimove{} ->
+ Src = hipe_sparc:multimove_src(Instr),
+ Lat = round(length(Src)/2),
+ {mmove,Lat};
+ #sethi{} -> alu;
+ #alu_cc{} -> alu_cc;
+ %% #cmov_cc{} -> cmov_cc;
+ %% #cmov_r{} -> alu;
+ #load_atom{} -> alu;
+ #load_address{} -> alu;
+ #pseudo_enter{} -> pseudo;
+ #pseudo_pop{} -> pseudo;
+ #pseudo_return{} -> pseudo;
+ #pseudo_spill{} -> pseudo;
+ #pseudo_unspill{} -> pseudo
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_write_deps
+%% Argument : Defs - registers that node N defines.
+%% N - current node
+%% Ty - the type of current instr
+%% DepTab - Dependence-table
+%% DAG - The dependence graph.
+%% Returns : {DepTab,DAG} - with new values
+%% Description : Adds dependencies to the graph for nodes that depends on the
+%% registers that N defines.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_write_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG};
+add_write_deps([D|Ds],N,Ty,DepTab,DAG) ->
+ {NewDepTab,NewDAG} = add_write_dep(D,N,Ty,DepTab,DAG),
+ add_write_deps(Ds,N,Ty,NewDepTab,NewDAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_write_dep
+%% Description : Updates the dependence table with N as next writer, and
+%% updates the DAG with the dependencies from N to subsequent
+%% nodes.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_write_dep(X,N,Ty,DepTab,DAG) ->
+ {NxtWriter,NxtReaders} = lookup(X,DepTab),
+ NewDepTab = writer(X,N,Ty,DepTab),
+ NewDAG = write_deps(N,Ty,NxtWriter,NxtReaders,DAG),
+ {NewDepTab, NewDAG}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : write_deps
+%% Argument : Instr - Current instr
+%% Ty - Type of current instr
+%% NxtWriter - The node that is the next writer of the ragister
+%% that Instr defines.
+%% NxtReaders - The nodes that are subsequent readers of the
+%% register that N defines.
+%% DAG - The dependence graph
+%% Returns : Calls raw_deps that finally returns a new DAG with the new
+%% dependence arcs added.
+%% Description : If a next writer exists a dependence arc for this node is
+%% added, and after this raw_deps is called to compute the
+%% arcs for read-after-write dependencies.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+write_deps(Instr,Ty,NxtWriter,NxtReaders,DAG) ->
+ DAG1 = case NxtWriter of
+ none ->
+ DAG;
+ {Instr,_} ->
+ DAG;
+ {Wr,WrTy} ->
+ dep_arc(Instr,
+ hipe_target_machine:waw_latency(Ty,WrTy),
+ Wr, DAG)
+ end,
+ raw_deps(Instr,Ty,NxtReaders,DAG1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : raw_deps
+%% Argument : Instr - current instr
+%% Type - type of instr
+%% Readers - subsequent readers
+%% DAG - dependence graph
+%% Returns : DAG - A new DAG with read-after-write dependencies added
+%% Description : Updates the DAG with the dependence-arcs from Instr to the
+%% subsequent readers, with the appropriate latencies.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+raw_deps(_Instr,_Type,[],DAG) -> DAG;
+raw_deps(Instr,Ty,[{Rd,RdTy}|Xs],DAG) ->
+ raw_deps(Instr,Ty,Xs,
+ dep_arc(Instr,hipe_target_machine:raw_latency(Ty,RdTy),
+ Rd,DAG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_read_deps
+%% Argument : Uses - The registers that node N uses.
+%% N - Index of the current node.
+%% Ty - Type of current node.
+%% DepTab - Dependence table
+%% DAG - Dependence graph
+%% Returns : {DepTab, DAG} - with updated values.
+%% Description : Adds the read dependencies from node N to subsequent ones,
+%% according to the registers that N uses.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_read_deps([],_N,_Ty,DepTab,DAG) -> {DepTab,DAG};
+add_read_deps([U|Us],N,Ty,DepTab,DAG) ->
+ {NewDepTab,NewDAG} = add_read_dep(U,N,Ty,DepTab,DAG),
+ add_read_deps(Us,N,Ty,NewDepTab,NewDAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_read_dep
+%% Argument : X - Used register
+%% N - Index of checked instr
+%% Ty - Type of checked instr
+%% DepTab - Hashtable with {next-writer, next-readers}
+%% DAG - Dependence graph
+%% Returns : {DepTab, DAG} - with updated values
+%% Description : Looks up what the next-writer/next-readers are, and adjusts
+%% the table with current node as new reader. Finally
+%% read-dependencies are added to the DAG.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+add_read_dep(X,N,Ty,DepTab,DAG) ->
+ {NxtWriter,_NxtReaders} = lookup(X,DepTab),
+ NewDepTab = reader(X,N,Ty,DepTab),
+ NewDAG = read_deps(N,Ty,NxtWriter,DAG),
+ {NewDepTab, NewDAG}.
+
+% If NxtWriter is 'none', then this var is not written subsequently
+% Add WAR from Instr to NxtWriter (if it exists)
+% *** UNFINISHED ***
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : read_deps
+%% Argument : N - Index of current node
+%% Ty - Type of current node
+%% Writer - tuple {NextWriter, WrType} where NextWriter is the
+%% subsequent instr that writes this register next time,
+%% and WrType is the type of that instr.
+%% DAG - The dependence graph
+%% Returns : DAG
+%% Description : Returns a new DAG if a next-writer exists, otherwise the old
+%% DAG is returned.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+read_deps(_Instr,_Ty,none,DAG) ->
+ DAG;
+read_deps(_Instr,_Ty,{_Instr,_},DAG) ->
+ DAG;
+read_deps(Instr,Ty,{NxtWr,NxtWrTy},DAG) ->
+ dep_arc(Instr,hipe_target_machine:war_latency(Ty,NxtWrTy),NxtWr,
+ DAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : empty_deptab
+%% Description : Creates an empty dependence table (hash-table)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+empty_deptab() ->
+ gb_trees:empty().
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : lookup
+%% Argument : X - key (register)
+%% DepTab - dependence table
+%% Returns : {NextWriter, NextReaders}
+%% Description : Returns next writer and a list of following readers on
+%% register X.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+lookup(X, DepTab) ->
+ case gb_trees:lookup(X, DepTab) of
+ none ->
+ {none, []};
+ {value, {W, Rs} = Val} ->
+ Val
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : writer
+%% Argument : X - key (register)
+%% N - index of writer
+%% Ty - type of writer
+%% DepTab - dependence table to be updated
+%% Returns : DepTab - new dependence table
+%% Description : Sets N tobe next writer on X
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+writer(X, N, Ty, DepTab) ->
+ gb_trees:enter(X, {{N, Ty}, []}, DepTab).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : reader
+%% Argument : X - key (register)
+%% N - index of reader
+%% Ty - type of reader
+%% DepTab - dependence table to be updated
+%% Returns : DepTab - new dependence table
+%% Description : Adds N to the dependence table as a reader.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+reader(X,N,Ty,DepTab) ->
+ {W,Rs} = lookup(X,DepTab),
+ gb_trees:enter(X,{W,[{N,Ty}|Rs]},DepTab).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The following version of md/2 separates heap- and stack operations,
+%% which allows for greater reordering.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : md
+%% Argument : IxBB - indexed block
+%% DAG - dependence graph
+%% Returns : DAG - new dependence graph
+%% Description : Adds arcs for load/store dependencies to the DAG.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+md(IxBB, DAG) ->
+ md(IxBB,empty_md_state(),DAG).
+
+md([],_,DAG) -> DAG;
+md([{N,I}|Is],St,DAG) ->
+ case md_type(I) of
+ other ->
+ md(Is,St,DAG);
+ {st,T} ->
+ { WAW_nodes, WAR_nodes, NewSt } = st_overlap(N,T,St),
+ md(Is,NewSt,
+ md_war_deps(WAR_nodes,N,md_waw_deps(WAW_nodes,N,DAG)));
+ {ld,T} ->
+ { RAW_nodes, NewSt } = ld_overlap(N,T,St),
+ md(Is,NewSt,
+ md_raw_deps(RAW_nodes,N,DAG))
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : md_war_deps
+%% Argument : WAR_nodes - write-after-read nodes depending on N
+%% N - index of current instr
+%% DAG - dependence graph
+%% Returns : DAG - updated DAG
+%% Description : Adds arcs for write-after-read dependencies for N
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+md_war_deps([],_,DAG) -> DAG;
+md_war_deps([M|Ms],N,DAG) ->
+ md_war_deps(Ms,N,dep_arc(M,hipe_target_machine:m_war_latency(),N,DAG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : md_waw_deps
+%% Argument : WAW_nodes - write-after-write nodes depending on N
+%% N - index of current instr
+%% DAG - dependence graph
+%% Returns : DAG - updated DAG
+%% Description : Adds arcs for write-after-write dependencies for N
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+md_waw_deps([],_,DAG) -> DAG;
+md_waw_deps([M|Ms],N,DAG) ->
+ md_waw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_waw_latency(),N,DAG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : md_raw_deps
+%% Argument : RAW_nodes - read-after-write nodes depending on N
+%% N - index of current instr
+%% DAG - dependence graph
+%% Returns : DAG - updated DAG
+%% Description : Adds arcs for read-after-write dependencies for N
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+md_raw_deps([],_,DAG) -> DAG;
+md_raw_deps([M|Ms],N,DAG) ->
+ md_raw_deps(Ms,N,dep_arc(M,hipe_target_machine:m_raw_latency(),N,DAG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : empty_md_state
+%% Description : Returns an empty memorydependence state, eg. 4 lists
+%% representing {StackStores, HeapStores, StackLoads, HeapLoads}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+empty_md_state() -> {[], [], [], []}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : md_type
+%% Argument : I - instr
+%% Description : Maps the instr-type to a simplified type, telling if it's
+%% store/load resp. heap or stack.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+md_type(I) ->
+ case I of
+ #load{} ->
+ Sp = hipe_sparc_registers:stack_pointer(),
+ Src = hipe_sparc:load_src(I),
+ N = hipe_sparc:reg_nr(Src),
+ Off = hipe_sparc:load_off(I),
+ if
+ N =:= Sp -> % operation on stack
+ {ld,{sp,Off}};
+ true ->
+ {ld,{hp,Src,Off}}
+ end;
+ #store{} ->
+ Sp = hipe_sparc_registers:stack_pointer(),
+ Dst = hipe_sparc:store_dest(I),
+ N = hipe_sparc:reg_nr(Dst),
+ Off = hipe_sparc:store_off(I),
+ if
+ N =:= Sp ->
+ {st,{sp,Off}};
+ true ->
+ {st,{hp,Dst,Off}}
+ end;
+ _ ->
+ other
+ end.
+
+%% Given a memory operation and a 'memory op state',
+%% overlap(N,MemOp,State) returns { Preceding_Dependent_Ops, NewState }.
+%% which are either a tuple { WAW_deps, WAR_deps } or a list RAW_deps.
+%%
+%% NOTES:
+%% Note that Erlang's semantics ("heap stores never overwrite existing data")
+%% means we can be quite free in reordering stores to the heap.
+%% Ld/St to the stack are simply handled by their offsets; since we do not
+%% rename the stack pointer, this is sufficient.
+%% *** We assume all memory ops have uniform size = 4 ***
+%%
+%% NOTES:
+%% The method mentioned above has now been changed because the assumption that
+%% "heap stores never overwrite existing data" caused a bug when the
+%% process-pointer was treated the same way as the heap. We were also told
+%% that the semantics can possibly change in the future, so it would be more
+%% safe to treat the heap store/loads as the stack.
+%% A future improvement can be to do an alias analysis to give more freedom
+%% in reordering stuff...
+%%
+%% Alias state:
+%% { [StackOp], [HeapOp], [StackOp], [HeapOp] }
+%% where StackOp = {InstrID, Offset}
+%% HeapOp = {InstrID, Reg, Offset}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : st_overlap
+%% Argument : N - Index of current node
+%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap
+%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] }
+%% where StackStrs/StackLds = {InstrID, Offset}
+%% and HeapStrs/HeapLds = {InstrID, Reg, Offset}
+%% Returns : { DepStrs, DepLds, State } -
+%% where DepStrs/DepLds = [NodeId]
+%% and State is the new state
+%% Description : Adds dependencies for overlapping stores.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+st_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) ->
+ {DepSt, IndepSt_Sp} = st_sp_dep(St_Sp, Off),
+ {DepLd, IndepLd_Sp} = ld_sp_dep(Ld_Sp, Off),
+ {DepSt, DepLd, {[{N, Off}|IndepSt_Sp], St_Hp, IndepLd_Sp, Ld_Hp}};
+st_overlap(N, {hp, Dst, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) ->
+ DstOff = {Dst, Off},
+ {DepSt,_IndepSt_Hp} = st_hp_dep(St_Hp, DstOff),
+ {DepLd, IndepLd_Hp} = ld_hp_dep(Ld_Hp, DstOff),
+ {DepSt, DepLd, {St_Sp, [{N, Dst, Off}|St_Hp], Ld_Sp, IndepLd_Hp}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : ld_overlap
+%% Argument : N - Index of current node
+%% Type - {sp,Off} or {hp,Dst,Off}, store on stack or heap
+%% State - { [StackStrs], [HeapStrs], [StackLds], [HeapLds] }
+%% where StackStrs/StackLds = {InstrID, Offset}
+%% and HeapStrs/HeapLds = {InstrID, Reg, Offset}
+%% Returns : { DepStrs, State }
+%% Description : Adds dependencies for overlapping laods
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ld_overlap(N, {sp, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) ->
+ DepSt = sp_dep_only(St_Sp, Off),
+ {DepSt, {St_Sp, St_Hp, [{N, Off}|Ld_Sp], Ld_Hp}};
+ld_overlap(N, {hp, Src, Off}, {St_Sp, St_Hp, Ld_Sp, Ld_Hp}) ->
+ DepSt = hp_dep_only(St_Hp, Src, Off),
+ {DepSt, {St_Sp, St_Hp, Ld_Sp, [{N, Src, Off}|Ld_Hp]}}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : st_sp_dep
+%% Description : Adds dependencies that are depending on a stack store
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+st_sp_dep(Stores, Off) ->
+ sp_dep(Stores, Off, [], []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : ld_sp_dep
+%% Description : Adds dependencies that are depending on a stack load
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ld_sp_dep(Loads, Off) ->
+ sp_dep(Loads, Off, [], []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : st_hp_dep
+%% Description : Adds dependencies that are depending on a heap store
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+st_hp_dep(Stores, {_Reg, _Off} = RegOff) ->
+ hp_dep(Stores, RegOff, [], []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : ld_hp_dep
+%% Description : Adds dependencies that are depending on a heap load
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ld_hp_dep(Loads, {_Reg, _Off} = RegOff) ->
+ hp_dep(Loads, RegOff, [], []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : sp_dep
+%% Description : Returns {Dependent, Independent} which are lists of nodes
+%% that depends or not on a stack load/store
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+sp_dep([], _Off, Dep, Indep) -> {Dep, Indep};
+sp_dep([{N,Off}|Xs], Off, Dep, Indep) ->
+ sp_dep(Xs, Off, [N|Dep], Indep);
+sp_dep([X|Xs], Off, Dep, Indep) ->
+ sp_dep(Xs, Off, Dep, [X|Indep]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : hp_dep
+%% Description : Returns {Dependent, Independent} which are lists of nodes
+%% that depends or not on a heap load/store
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+hp_dep([], {_Reg,_Off}, Dep, Indep) -> {Dep,Indep};
+hp_dep([{N,Reg,Off1}|Xs], {Reg,Off}, Dep, Indep) when Off1 =/= Off ->
+ hp_dep(Xs, {Reg,Off}, Dep, [{N,Reg,Off1}|Indep]);
+hp_dep([{N,_,_}|Xs], {Reg,Off}, Dep, Indep) ->
+ hp_dep(Xs, {Reg,Off}, [N|Dep], Indep).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : sp_dep_only
+%% Description : Returns a list of nodes that are depending on a stack store
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+sp_dep_only(Stores, Off) ->
+ [N || {N,Off0} <- Stores, Off =:= Off0].
+
+%% Dependences from heap stores to heap loads.
+%% *** UNFINISHED ***
+%% - but works
+%% This is somewhat subtle:
+%% - a heap load can only bypass a heap store if we KNOW it won't
+%% load the stored value
+%% - unfortunately, we do not know the relationships between registers
+%% at this point, so we can't say that store(p+4) is independent of
+%% load(q+0).
+%% (OR CAN WE? A bit closer reasoning might show that it's possible?)
+%% - We can ONLY say that st(p+c) and ld(p+c') are independent when c /= c'
+%%
+%% (As said before, it might be possible to lighten this restriction?)
+
+hp_dep_only([], _Reg, _Off) -> [];
+hp_dep_only([{_N,Reg,Off_1}|Xs], Reg, Off) when Off_1 =/= Off ->
+ hp_dep_only(Xs, Reg, Off);
+hp_dep_only([{N,_,_}|Xs], Reg, Off) ->
+ [N|hp_dep_only(Xs, Reg, Off)].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Control dependences:
+%% - add dependences so that
+%% * branches are performed in order
+%% * unsafe operations are 'fenced in' by surrounding branches
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd
+%% Argument : IxBB - indexed block
+%% DAG - dependence graph
+%% Returns : DAG - new dependence graph
+%% Description : Adds conditional dependencies to the DAG
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd(IxBB,DAG) ->
+ cd(IxBB, DAG, none, [], []).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd
+%% Argument : IxBB - indexed block
+%% DAG - dependence graph
+%% PrevBr - previous branch
+%% PrevUnsafe - previous unsafe instr (mem-op)
+%% PrevOthers - previous other instrs, used to "fix" preceeding
+%% instrs so they don't bypass a branch.
+%% Returns : DAG - new dependence graph
+%% Description : Adds conditional dependencies to the graph.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd([], DAG, _PrevBr, _PrevUnsafe, _PrevOthers) ->
+ DAG;
+cd([{N,I}|Xs], DAG, PrevBr, PrevUnsafe, PrevOthers) ->
+ case cd_type(I) of
+ {branch,Ty} ->
+ DAG1 = cd_branch_to_other_deps(N, PrevOthers, DAG),
+ NewDAG = cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG1),
+ cd(Xs,NewDAG,{N,Ty},[],[]);
+ {unsafe,Ty} ->
+ NewDAG = cd_unsafe_deps(PrevBr,N,Ty,DAG),
+ cd(Xs, NewDAG, PrevBr, [{N,Ty}|PrevUnsafe], PrevOthers);
+ {other,_Ty} ->
+ cd(Xs, DAG, PrevBr, PrevUnsafe, [N|PrevOthers])
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd_branch_to_other_deps
+%% Argument : N - index of branch
+%% Ms - list of indexes of "others" preceeding instrs
+%% DAG - dependence graph
+%% Returns : DAG - new graph
+%% Description : Makes preceeding instrs fixed so they don't bypass a branch
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd_branch_to_other_deps(_, [], DAG) ->
+ DAG;
+cd_branch_to_other_deps(N, [M | Ms], DAG) ->
+ cd_branch_to_other_deps(N, Ms, dep_arc(M, zero_latency(), N, DAG)).
+
+%% Is the operation a branch, an unspeculable op or something else?
+
+%% Returns
+%% {branch,BranchType}
+%% {unsafe,OpType}
+%% {other,OpType}
+
+%% *** UNFINISHED ***
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd_type
+%% Argument : I - instr
+%% Description : Maps instrs to a simpler type.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd_type(I) ->
+ case I of
+ #goto{} ->
+ {branch,uncond};
+ #br{} ->
+ {branch,'cond'};
+ #b{} ->
+ {branch,'cond'};
+ #call_link{} ->
+ {branch,call};
+ #jmp_link{} ->
+ {branch,call};
+ #jmp{} ->
+ {branch,call};
+ #load{} ->
+ {unsafe,load};
+ #store{} ->
+ {unsafe,load};
+ T ->
+ {other,T}
+ end.
+
+%% add dependences to keep order of branches + unspeculable ops:
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd_branch_deps
+%% Argument : PrevBr - preceeding branch
+%% PrevUnsafe - preceeding unsafe ops, eg, mem-ops
+%% N - current id.
+%% Ty - type of current instr
+%% DAG - dependence graph
+%% Returns : DAG - new DAG
+%% Description : Adds arcs between branches and calls deps_to_unsafe that adds
+%% arcs between branches and unsafe ops.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd_branch_deps(PrevBr, PrevUnsafe, N, Ty, DAG) ->
+ DAG1 = case PrevBr of
+ none ->
+ DAG;
+ {Br,BrTy} ->
+ dep_arc(Br,
+ hipe_target_machine:br_br_latency(BrTy,Ty),
+ N, DAG)
+ end,
+ deps_to_unsafe(PrevUnsafe, N, Ty, DAG1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : deps_to_unsafe
+%% Description : Adds dependencies between unsafe's and branches
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+deps_to_unsafe([], _, _, DAG) -> DAG;
+deps_to_unsafe([{M,UTy}|Us], N, Ty, DAG) ->
+ deps_to_unsafe(Us,N,Ty,
+ dep_arc(M, hipe_target_machine:unsafe_to_br_latency(UTy,Ty),
+ N, DAG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cd_unsafe_deps
+%% Description : Adds dependencies between branches and unsafe's
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+cd_unsafe_deps(none, _, _, DAG) ->
+ DAG;
+cd_unsafe_deps({Br,BrTy}, N, Ty, DAG) ->
+ dep_arc(Br, hipe_target_machine:br_to_unsafe_latency(BrTy, Ty), N, DAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : def_use
+%% Argument : Instr
+%% Description : Returns the registers that Instr defines resp. uses as 2 lists
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def_use(Instr) ->
+ {hipe_sparc:defines(Instr), hipe_sparc:uses(Instr)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : move_or_alu
+%% Description : True if the instruction is a move or an alu; false otherwise
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+move_or_alu(#move{}) -> true;
+move_or_alu(#alu{}) -> true;
+move_or_alu(_) -> false.
+
+%% Debugging stuff below %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-ifdef(debug1).
+debug1_stuff(Blk, DAG, IxBlk, Sch, X) ->
+ io:format("Blk: ~p~n",[Blk]),
+ io:format("DAG: ~n~p~n~p",[DAG,IxBlk]),
+ io:format("~n"),
+ print_instrs(IxBlk),
+ print_sch(Sch, IxBlk),
+ print_instrs2(X).
+
+print_instrs([]) ->
+ io:format("~n");
+print_instrs([{N,Instr} | Instrs]) ->
+ io:format("(~p): ",[N]),
+ hipe_sparc_pp:pp_instr(Instr),
+ io:format("~p~n",[element(1,Instr)]),
+ print_instrs(Instrs).
+
+print_instrs2([]) ->
+ io:format("~n");
+print_instrs2([Instr | Instrs]) ->
+ hipe_sparc_pp:pp_instr(Instr),
+ print_instrs2(Instrs).
+
+print_sch([],_) -> io:format("~n");
+print_sch([{{cycle,Cycle},{node,I}} | Rest], IxBlk) ->
+ io:format("{C~p, N~p} ",[Cycle,I]),
+ print_node(I, IxBlk),
+ print_sch(Rest, IxBlk).
+
+print_node(_, []) ->
+ io:format("~n");
+print_node(I, [{I, Instr} | _]) ->
+ hipe_sparc_pp:pp_instr(Instr);
+print_node(I, [_ | IxBlk]) ->
+ print_node(I, IxBlk).
+-else.
+debug1_stuff(_Blk, _DAG, _IxBlk, _Sch, _X) ->
+ ok.
+-endif.
diff --git a/lib/hipe/opt/hipe_schedule_prio.erl b/lib/hipe/opt/hipe_schedule_prio.erl
new file mode 100644
index 0000000000..4d078b007d
--- /dev/null
+++ b/lib/hipe/opt/hipe_schedule_prio.erl
@@ -0,0 +1,58 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% PRIORITY HANDLING AND PRIORITY CALCULATION
+%%
+%% Handling of ready nodes and priorities.
+%% - at present, all nodes have the same priority and so on.
+%%
+%% *** UNFINISHED ***
+%% - should compute a static priority estimate
+%% - should dynamically modify priorities + possibly insert NOPs
+%% (e.g., to separate branches, etc.)
+%% - thus, ought to be passed the current schedule and/or resources as well
+
+-module(hipe_schedule_prio).
+-export([init_ready/2,
+ init_instr_prio/2,
+ %% initial_ready_set/4,
+ next_ready/7,
+ add_ready_nodes/2,
+ insert_node/3
+ ]).
+
+init_ready(Size,Preds) ->
+ hipe_ultra_prio:init_ready(Size,Preds).
+
+init_instr_prio(N,DAG) ->
+ hipe_ultra_prio:init_instr_prio(N,DAG).
+
+%% initial_ready_set(M,N,Preds,Ready) ->
+%% hipe_ultra_prio:initial_ready_set(M,N,Preds,Ready).
+
+next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl) ->
+ hipe_ultra_prio:next_ready(C,Ready,Prio,Nodes,DAG,Preds,Earl).
+
+add_ready_nodes(NodeLst,Ready) ->
+ hipe_ultra_prio:add_ready_nodes(NodeLst,Ready).
+
+insert_node(C,I,Ready) ->
+ hipe_ultra_prio:insert_node(C,I,Ready).
diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl
new file mode 100644
index 0000000000..df885a7dff
--- /dev/null
+++ b/lib/hipe/opt/hipe_spillmin.erl
@@ -0,0 +1,111 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% ==========================================================================
+%% Module : hipe_spillmin
+%% Purpose : Driver module for minimizing the number of stack slots used
+%% by a function. This is done using an algorithm for register
+%% allocation. The implementation is target-independent and
+%% requires a target-specific interface module as argument.
+%%
+%% $Id$
+%% ==========================================================================
+%% Exported functions (short description):
+%%
+%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->
+%% {Coloring, NumberOfSpills}
+%% Takes a CFG and the TempMap from register allocation and returns
+%% a coloring of stack slots.
+%% StackSlots should be a list of used stack slots, usually empty at
+%% first call to function.
+%% SpillIndex is the the first position we will spill to, usually 0.
+%% TempMap is the TempMap from the register allocation
+%%
+%% The Coloring will be in the form of the "allocation datastructure"
+%% described below, that is, a list of tuples on the form
+%% {Name, {spill, SpillIndex}}
+%% The NumberOfSpills is either 0 indicating no spill or the
+%% SpillIndex of the last spilled register.
+%%
+%% mapmerge(Map, SpillMap) -> NewMap
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(hipe_spillmin).
+-export([stackalloc/6, mapmerge/2]).
+
+%%-define(DEBUG, 1).
+-define(HIPE_INSTRUMENT_COMPILER, true).
+
+%%---------------------------------------------------------------------------
+
+-include("../main/hipe.hrl").
+-include("../flow/cfg.hrl").
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap)
+%% Calculates an allocation of stack slots using either a linear scan
+%% or a graph coloring allocation algorithm.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec stackalloc(#cfg{}, [_], non_neg_integer(),
+ comp_options(), module(), hipe_temp_map()) ->
+ {hipe_spill_map(), non_neg_integer()}.
+
+stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->
+ case proplists:get_bool(spillmin_color, Options) of
+ false ->
+ ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex,
+ Options, Target, TempMap),
+ "Spill minimize, linear scan", Options);
+ true ->
+ ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex,
+ Options, Target, TempMap),
+ "Spill minimize, graph coloring", Options)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% mapmerge(Map, SpillMap)
+%%
+%% stackalloc/6 will only return the subset of the tempmap that contains
+%% the spilled temporaries. This function is used to merge the old
+%% complete tempmap with the new spill information.
+%% Map is the old map (a list of [{R0, C1}, {R1, C2}, ...]).
+%% SpillMap is the new "spill" map.
+%% !! Warning, the function does not work with the maps in another order !!
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Combines the map with allocated spills with a map from the register
+%% allocator
+
+-spec mapmerge(hipe_map(), hipe_spill_map()) -> hipe_map().
+
+mapmerge(TempMap, SpillMap) ->
+ mapmerge(TempMap, SpillMap, []).
+
+mapmerge([], _, Ack) ->
+ lists:reverse(Ack);
+mapmerge([{T1, _}|T1s], [{T2, C}|T2s], Ack) when T1 =:= T2 ->
+ mapmerge(T1s, T2s, [{T1, C}|Ack]);
+mapmerge([{_, unknown}|T1s], T2s, Ack) ->
+ mapmerge(T1s, T2s, Ack);
+mapmerge([T1|T1s], T2s, Ack) ->
+ mapmerge(T1s, T2s, [T1|Ack]).
diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl
new file mode 100644
index 0000000000..11a281100b
--- /dev/null
+++ b/lib/hipe/opt/hipe_spillmin_color.erl
@@ -0,0 +1,556 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% ===========================================================================
+%%@doc
+%% GRAPH COLORING STACK SLOT SPILL MINIMIZER
+%%
+%% A simple pessimistic graph coloring stack slot spill minimizer
+%%
+%% - build interference graph
+%% - estimate number of stack slots needed
+%% - simplify graph (push on stack, abort and retry with more stack slots if spill)
+%% - select colors
+%%
+%% Emits a coloring: a list of {TempName,Location}
+%% where Location is {spill,M}.
+%% {spill,M} denotes the Mth spilled node
+%%
+%% This version uses ETS tables
+%%
+%% Deficiencies:
+%% - pessimistic coloring
+%%
+
+-module(hipe_spillmin_color).
+
+-export([stackalloc/6]).
+
+%%-ifndef(DO_ASSERT).
+%%-define(DO_ASSERT, true).
+%%-endif.
+
+%%-ifndef(DEBUG).
+%%-define(DEBUG,0).
+%%-endif.
+
+%%---------------------------------------------------------------------------
+
+-include("../main/hipe.hrl").
+-include("../flow/cfg.hrl").
+
+%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want.
+-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)).
+-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)).
+-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)).
+-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)).
+
+%% Emits a coloring: a list of {TempName,Location}
+%% where Location is {spill,M}.
+%% {spill,M} denotes the Mth spilled node
+
+-spec stackalloc(#cfg{}, [_], non_neg_integer(),
+ comp_options(), module(), hipe_temp_map()) ->
+ {hipe_spill_map(), non_neg_integer()}.
+
+stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) ->
+ ?report2("building IG~n", []),
+ {IG, NumNodes} = build_ig(CFG, Target, TempMap),
+ {Cols, MaxColors} =
+ color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1),
+ SortedCols = lists:sort(Cols),
+ {remap_temp_map(SortedCols, TempMap, SpillIndex), SpillIndex+MaxColors}.
+
+%% Rounds a floating point value upwards
+ceiling(X) ->
+ T = trunc(X),
+ case (X - T) of
+ Neg when Neg < 0.0 -> T;
+ Pos when Pos > 0.0 -> T + 1;
+ _ -> T
+ end.
+
+%% Emits a coloring: an unsorted list of {Temp,Location}
+%% where Location is {spill,M}.
+%% {spill,M} denotes the Mth spilled node
+%%
+%% Notes:
+%% - Arguments:
+%% IG: The interference graph
+%% Min: The lower bound, the minimal number of colors tried.
+%% Max: The upper bound, the maximal number of colors tried.
+%% Safe: The number of colors that are guaranteed to work. This is
+%% needed, because we reuse information from color() about how
+%% many colors it used at the last try, but this is not guaranteed to
+%% be a feasible solution because color might work differently using
+%% more colors although it has successfully colored the graph with
+%% fewer colors previously. Example: color(666) colors with 23 colors,
+%% but color(23) fails.
+%% We use Safe inefficently, because we run color 1 additional
+%% time with the same argument if Safe is needed.
+%% MaxNodes: The number of nodes in IG.
+%% Target: Target specific information.
+%% MaxDepth: The maximum recursion depth.
+color_heuristic(IG, Min, Max, Safe, MaxNodes, Target, MaxDepth) ->
+ case MaxDepth of
+ 0 ->
+ case color(IG, ordsets:from_list(init_stackslots(Max)),
+ MaxNodes, Target) of
+ not_easily_colorable ->
+ color(IG, ordsets:from_list(init_stackslots(Safe)),
+ MaxNodes, Target);
+ Else ->
+ Else
+ end;
+ _ ->
+ %% This can be increased from 2, and by this the heuristic can be
+ %% exited earlier, but the same can be achived by decreasing the
+ %% recursion depth. This should not be decreased below 2.
+ case (Max - Min) < 2 of
+ true ->
+ case color(IG, ordsets:from_list(init_stackslots(Max)),
+ MaxNodes, Target) of
+ not_easily_colorable ->
+ color(IG, ordsets:from_list(init_stackslots(Safe)),
+ MaxNodes, Target);
+ Else ->
+ Else
+ end;
+ false ->
+ NumSlots = ceiling((Max - Min)/2) + Min,
+ case color(IG, ordsets:from_list(init_stackslots(NumSlots)),
+ MaxNodes, Target) of
+ not_easily_colorable ->
+ color_heuristic(IG, NumSlots, Max,
+ Safe, MaxNodes, Target, MaxDepth - 1);
+ {_TmpCols, TmpMaxColors} ->
+ color_heuristic(IG, Min, TmpMaxColors,
+ NumSlots, MaxNodes, Target, MaxDepth - 1)
+ end
+ end
+ end.
+
+%% Returns a new temp map with the spilled temporaries mapped to stack slots,
+%% located after SpillIndex, according to Cols.
+remap_temp_map(Cols, TempMap, SpillIndex) ->
+ remap_temp_map0(Cols, hipe_temp_map:to_substlist(TempMap), SpillIndex).
+
+remap_temp_map0([], _TempMap, _SpillIndex) ->
+ [];
+remap_temp_map0([{_M, {spill, N}}|Xs], [{TempNr, {spill,_}}|Ys], SpillIndex) ->
+ [{TempNr, {spill, SpillIndex + N-1}}|remap_temp_map0(Xs, Ys, SpillIndex)];
+remap_temp_map0(Cols, [_Y|Ys], SpillIndex) ->
+ remap_temp_map0(Cols, Ys, SpillIndex).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% *** BUILD THE INTERFERENCE GRAPH ***
+%%
+%% Returns {Interference_graph, Number_Of_Nodes}
+%%
+
+build_ig(CFG, Target, TempMap) ->
+ try build_ig0(CFG, Target, TempMap)
+ catch error:Rsn -> exit({regalloc, build_ig, Rsn})
+ end.
+
+%% Creates an ETS table consisting of the keys given in List, with the values
+%% being an integer which is the position of the key in List.
+%% [1,5,7] -> {1,0} {5,1} {7,2}
+%% etc.
+setup_ets(List) ->
+ setup_ets0(List, ets:new(tempMappingTable, []), 0).
+
+setup_ets0([], Table, _N) ->
+ Table;
+setup_ets0([X|Xs], Table, N) ->
+ ets:insert(Table, {X, N}),
+ setup_ets0(Xs, Table, N+1).
+
+build_ig0(CFG, Target, TempMap) ->
+ Live = Target:analyze(CFG),
+ TempMapping = map_spilled_temporaries(TempMap),
+ TempMappingTable = setup_ets(TempMapping),
+ NumSpilled = length(TempMapping),
+ IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled),
+ Target, TempMap, TempMappingTable),
+ ets:delete(TempMappingTable),
+ {normalize_ig(IG), NumSpilled}.
+
+build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) ->
+ IG;
+build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) ->
+ Xs = bb(CFG, L, Target),
+ LiveOut = [X || X <- liveout(Live, L, Target),
+ hipe_temp_map:is_spilled(X, TempMap)],
+ LiveOutList = ordsets:to_list(LiveOut),
+ LiveOutListMapped = list_map(LiveOutList, TempMapping, []),
+ LiveOutSetMapped = ordsets:from_list(LiveOutListMapped),
+ {_, NewIG} =
+ build_ig_bb(Xs, LiveOutSetMapped, IG, Target, TempMap, TempMapping),
+ build_ig_bbs(Ls, CFG, Live, NewIG, Target, TempMap, TempMapping).
+
+build_ig_bb([], LiveOut, IG, _Target, _TempMap, _TempMapping) ->
+ {LiveOut, IG};
+build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) ->
+ {Live,NewIG} =
+ build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping),
+ build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping).
+
+build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) ->
+ {Def, Use} = def_use(X, Target, TempMap),
+ ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]),
+ DefListMapped = list_map(Def, TempMapping, []),
+ UseListMapped = list_map(Use, TempMapping, []),
+ DefSetMapped = ordsets:from_list(DefListMapped),
+ UseSetMapped = ordsets:from_list(UseListMapped),
+ NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG),
+ NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)),
+ {NewLive, NewIG}.
+
+%% Given a list of Keys and an ets-table returns a list of the elements
+%% in Mapping corresponding to the Keys and appends Acc to this list.
+list_map([], _Mapping, Acc) ->
+ Acc;
+list_map([X|Xs], Mapping, Acc) ->
+ {_Key, Val} = hd(ets:lookup(Mapping, X)),
+ list_map(Xs, Mapping, [Val | Acc]).
+
+%% Returns an ordered list of spilled temporaries in TempMap
+map_spilled_temporaries(TempMap) ->
+ map_spilled_temporaries0(hipe_temp_map:to_substlist(TempMap)).
+
+map_spilled_temporaries0([]) ->
+ [];
+map_spilled_temporaries0([{N, {spill, _}}|Xs]) ->
+ [N | map_spilled_temporaries0(Xs)];
+map_spilled_temporaries0([_X|Xs]) ->
+ map_spilled_temporaries0(Xs).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+interference_arcs([], _Live, IG) ->
+ IG;
+interference_arcs([X|Xs], Live, IG) ->
+ interference_arcs(Xs, Live, i_arcs(X, Live, IG)).
+
+i_arcs(_X, [], IG) ->
+ IG;
+i_arcs(X, [Y|Ys], IG) ->
+ i_arcs(X, Ys, add_edge(X, Y, IG)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% *** COLORING ***
+%%
+%% Coloring is done straightforwardly:
+%% - find the low-degree nodes, put them in low
+%% - while low non-empty:
+%% * remove x from low
+%% * push x on stack
+%% * decrement degree of neighbors of x
+%% * for each neighbor y of low degree, put y on low
+%% - when low empty:
+%% - if graph empty, return stack
+%% - otherwise
+%% throw an exception (the caller should retry with more stack slots)
+
+color(IG, StackSlots, NumNodes, Target) ->
+ try
+ color_0(IG, StackSlots, NumNodes, Target)
+ catch
+ error:Rsn ->
+ ?error_msg("Coloring failed with ~p~n", [Rsn]),
+ ?EXIT(Rsn)
+ end.
+
+color_0(IG, StackSlots, NumNodes, Target) ->
+ ?report("simplification of IG~n", []),
+ K = ordsets:size(StackSlots),
+ Nodes = list_ig(IG),
+ Low = low_degree_nodes(Nodes, K),
+ ?report(" starting with low degree nodes ~p~n", [Low]),
+ EmptyStk = [],
+ case simplify(Low, NumNodes, IG, K, EmptyStk, Target) of
+ non_simplifiable -> not_easily_colorable;
+ Stk ->
+ ?report(" selecting colors~n", []),
+ select(Stk, IG, StackSlots, NumNodes)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%
+%%
+%% Simplification: push all easily colored nodes on a stack;
+%% when the list of easy nodes becomes empty, see if graph is
+%% empty as well. If it is not, throw an exception and abort.
+%% If it is empty, return the stack.
+%%
+%% Notes:
+%% - Arguments:
+%% Low: low-degree nodes (ready to color)
+%% NumNodes: number of remaining nodes in graph
+%% IG: interference graph
+%% K: number of colors
+%% Stk: stack of already simplified nodes
+%% Target: Machine to compile for
+
+simplify(Low, NumNodes, IG, K, Stk, Target) ->
+ Vis = none_visited(NumNodes),
+ simplify_ig(Low, NumNodes, IG, K, Stk, Vis, Target).
+
+simplify_ig([], 0, _IG, _K, Stk, _Vis, _Target) ->
+ Stk;
+simplify_ig([], N, _IG, _K, _Stk, _Vis, _Target) when N > 0 ->
+ ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]),
+ non_simplifiable;
+simplify_ig([X|Xs], N, IG, K, Stk, Vis, Target) ->
+ ?report3("N: ~w Stk: ~w N+Stk ~w\n", [N,length(Stk),N+length(Stk)]),
+ case is_visited(X, Vis) of
+ true ->
+ ?report(" node ~p already visited~n", [X]),
+ simplify_ig(Xs, N, IG, K, Stk, Vis, Target);
+ false ->
+ ?report("Stack ~w\n", [Stk]),
+ {NewLow, NewIG} = decrement_neighbors(X, Xs, IG, Vis, K),
+ ?report(" node ~w pushed\n(~w now ready)~n", [X, NewLow]),
+ NewStk = push_colored(X, Stk),
+ simplify_ig(NewLow, N-1, NewIG, K, NewStk, visit(X, Vis), Target)
+ end.
+
+decrement_neighbors(X, Xs, IG, Vis, K) ->
+ Ns = unvisited_neighbors(X, Vis, IG),
+ ?report(" node ~p has neighbors ~w\n(unvisited ~p)~n",
+ [X, neighbors(X, IG), Ns]),
+ decrement_each(Ns, Xs, IG, Vis, K).
+
+%% For each node, decrement its degree and check if it is now
+%% a low-degree node. In that case, add it to the 'low list'.
+decrement_each([], Low, IG, _Vis, _K) ->
+ {Low, IG};
+decrement_each([N|Ns], OldLow, IG, Vis, K) ->
+ {Low, CurrIG} = Res = decrement_each(Ns, OldLow, IG, Vis, K),
+ case is_visited(N, Vis) of
+ true ->
+ Res;
+ false ->
+ {D, NewIG} = decrement_degree(N, CurrIG),
+ if
+ D =:= K-1 ->
+ {[N|Low], NewIG};
+ true ->
+ {Low, NewIG}
+ end
+ end.
+
+%%%%%%%%%%%%%%%%%%%%
+%%
+%% Returns a list of {Name,Location}, where Location is {spill,M}
+%%
+%% Note: we use pessimistic coloring here.
+%% - we could use optimistic coloring: for spilled node, check if there is
+%% an unused color among the neighbors and choose that.
+
+select(Stk, IG, PhysRegs, NumNodes) ->
+ select_colors(Stk, IG, none_colored(NumNodes), PhysRegs).
+
+select_colors([], _IG, _Cols, _PhysRegs) ->
+ ?report("all nodes colored~n", []),
+ {[], 0};
+select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) ->
+ ?report("color of ~p\n", [X]),
+ {Slot,NewCols} = select_color(X, IG, Cols, PhysRegs),
+ ?report("~p~n", [Slot]),
+ {Tail, MaxColor} = select_colors(Xs, IG, NewCols, PhysRegs),
+ NewMaxColor = erlang:max(Slot, MaxColor),
+ %% Since we are dealing with spills we label all our temporaries accordingly.
+ {[{X,{spill,Slot}} | Tail], NewMaxColor}.
+
+select_color(X, IG, Cols, PhysRegs) ->
+ UsedColors = get_colors(neighbors(X, IG), Cols),
+ Reg = select_unused_color(UsedColors, PhysRegs),
+ {Reg, set_color(X, Reg, Cols)}.
+
+%%%%%%%%%%%%%%%%%%%%
+
+get_colors([], _Cols) -> [];
+get_colors([X|Xs], Cols) ->
+ case color_of(X, Cols) of
+ uncolored ->
+ get_colors(Xs, Cols);
+ {color, R} ->
+ [R|get_colors(Xs, Cols)]
+ end.
+
+select_unused_color(UsedColors, PhysRegs) ->
+ Summary = ordsets:from_list(UsedColors),
+ AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)),
+ hd(AvailRegs).
+
+push_colored(X, Stk) ->
+ [{X, colorable} | Stk].
+
+low_degree_nodes([], _K) -> [];
+low_degree_nodes([{N,Info}|Xs], K) ->
+ ?report0("node ~p has degree ~p: ~w~n", [N, degree(Info), neighbors(Info)]),
+ Deg = degree(Info),
+ if
+ Deg < K ->
+ [N|low_degree_nodes(Xs, K)];
+ true ->
+ low_degree_nodes(Xs, K)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+unvisited_neighbors(X, Vis, IG) ->
+ ordsets:from_list(unvisited(neighbors(X, IG), Vis)).
+
+unvisited([], _Vis) -> [];
+unvisited([X|Xs], Vis) ->
+ case is_visited(X, Vis) of
+ true ->
+ unvisited(Xs, Vis);
+ false ->
+ [X|unvisited(Xs, Vis)]
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% *** ABSTRACT DATATYPES ***
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%
+%% The stack slot datatype
+%%
+
+init_stackslots(NumSlots) ->
+ init_stackslots(NumSlots, []).
+
+init_stackslots(0, Acc) ->
+ Acc;
+init_stackslots(NumSlots, Acc) ->
+ init_stackslots(NumSlots - 1, [NumSlots|Acc]).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The ig datatype:
+%%
+%% Note: if we know the number of temps used, we can use a VECTOR
+%% instead, which will speed up things.
+%%
+%% Note: later on, we may wish to add 'move-related' support.
+
+-record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}).
+
+empty_ig(NumNodes) ->
+ hipe_vectors:new(NumNodes, #ig_info{}).
+
+degree(Info) ->
+ Info#ig_info.degree.
+
+neighbors(Info) ->
+ Info#ig_info.neighbors.
+
+add_edge(X, X, IG) -> IG;
+add_edge(X, Y, IG) ->
+ add_arc(X, Y, add_arc(Y, X, IG)).
+
+add_arc(X, Y, IG) ->
+ Info = hipe_vectors:get(IG, X),
+ Old = neighbors(Info),
+ New = Info#ig_info{neighbors = [Y|Old]},
+ hipe_vectors:set(IG,X,New).
+
+normalize_ig(IG) ->
+ Size = hipe_vectors:size(IG),
+ normalize_ig(Size-1, IG).
+
+normalize_ig(-1, IG) ->
+ IG;
+normalize_ig(I, IG) ->
+ Info = hipe_vectors:get(IG, I),
+ N = ordsets:from_list(neighbors(Info)),
+ NewInfo = Info#ig_info{neighbors = N, degree = length(N)},
+ NewIG = hipe_vectors:set(IG, I, NewInfo),
+ normalize_ig(I-1, NewIG).
+
+neighbors(X, IG) ->
+ Info = hipe_vectors:get(IG, X),
+ Info#ig_info.neighbors.
+
+decrement_degree(X, IG) ->
+ Info = hipe_vectors:get(IG, X),
+ Degree = degree(Info),
+ NewDegree = Degree-1,
+ NewInfo = Info#ig_info{degree = NewDegree},
+ {NewDegree, hipe_vectors:set(IG, X, NewInfo)}.
+
+list_ig(IG) ->
+ hipe_vectors:list(IG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The coloring datatype:
+
+none_colored(NumNodes) ->
+ hipe_vectors:new(NumNodes, uncolored).
+
+color_of(X, Cols) ->
+ hipe_vectors:get(Cols, X).
+
+set_color(X, R, Cols) ->
+ hipe_vectors:set(Cols, X, {color, R}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Note: there might be a slight gain in separating the two versions
+%% of visit/2 and visited/2. (So that {var,X} selects X and calls
+%% the integer version.
+
+none_visited(NumNodes) ->
+ hipe_vectors:new(NumNodes, false).
+
+visit(X, Vis) ->
+ hipe_vectors:set(Vis, X, true).
+
+is_visited(X, Vis) ->
+ hipe_vectors:get(Vis, X).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% *** INTERFACES TO OTHER MODULES ***
+%%
+
+liveout(CFG, L, Target) ->
+ ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)).
+
+bb(CFG, L, Target) ->
+ hipe_bb:code(Target:bb(CFG, L)).
+
+def_use(X, Target, TempMap) ->
+ Defines = [Y || Y <- reg_names(Target:defines(X), Target),
+ hipe_temp_map:is_spilled(Y, TempMap)],
+ Uses = [Z || Z <- reg_names(Target:uses(X), Target),
+ hipe_temp_map:is_spilled(Z, TempMap)],
+ {Defines, Uses}.
+
+reg_names(Regs, Target) ->
+ [Target:reg_nr(X) || X <- Regs].
diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl
new file mode 100644
index 0000000000..c58906c389
--- /dev/null
+++ b/lib/hipe/opt/hipe_spillmin_scan.erl
@@ -0,0 +1,559 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% ===========================================================================
+%% Copyright (c) 2002 by Niklas Andersson, Andreas Lundin, and Erik Johansson.
+%% ===========================================================================
+%% Module : hipe_spillmin_scan
+%% Purpose : Optimizes the number of stack slots used by using a
+%% "linear-scan algorithm" to allocate stack slots.
+%% Notes : * This is a simplified implementation of
+%% "Linear Scan Register Allocation" by
+%% Massimiliano Poletto & Vivek Sarkar described in
+%% ACM TOPLAS Vol 21, No 5, September 1999.
+%%
+%% * This implementation is target-independent and
+%% requires a target specific interface module
+%% as argument.
+%%
+%% * Based on the hipe_ls_regalloc module by Erik Johansson
+%%
+%% History : * 2002-04-01, NA & AL: Created
+%% * 2002-10-08, Happi: Cleanup and speedup
+%% ============================================================================
+%% Exported functions (short description):
+%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->
+%% {Coloring, NumberOfSpills}
+%% Takes a CFG and the TempMap from register allocation and returns
+%% a coloring of stack slots.
+%% StackSlots should be a list of used stack slots, usually empty at
+%% first call to function.
+%% SpillIndex is the the first position we will spill to, usually 0.
+%% TempMap is the TempMap from the register allocation
+%%
+%% The Coloring will be in the form of the "allocation datastructure"
+%% described below, that is, a list of tuples on the form
+%% {Name, {spill, SpillIndex}}
+%% The NumberOfSpills is either 0 indicating no spill or the
+%% SpillIndex of the last spilled register.
+%%
+%% mapmerge
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(hipe_spillmin_scan).
+
+-export([stackalloc/6]).
+
+%%-define(DEBUG, 1).
+-define(HIPE_INSTRUMENT_COMPILER, true).
+
+%%----------------------------------------------------------------------------
+
+-include("../main/hipe.hrl").
+-include("../flow/cfg.hrl").
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap)
+%% Calculates an allocation of stack slots using a linear_scan algorithm.
+%% There are three steps in the algorithm:
+%% 1. Calculate live-ranges for all spilled temporaries.
+%% 2. Calculate live-intervals for each temporary.
+%% The live interval consists of a start position and a end position
+%% these are the first definition and last use of the temporary
+%% given as instruction numbers in a breadth-first traversal of the
+%% control-flow-graph.
+%% 3. Do a linear scan allocation over the live intervals.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec stackalloc(#cfg{}, [_], non_neg_integer(),
+ comp_options(), module(), hipe_temp_map()) ->
+ {hipe_spill_map(), non_neg_integer()}.
+
+stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->
+ ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]),
+ %% Step 1: Calculate liveness (Call external implementation.)
+ Liveness = liveness(CFG, Target),
+ ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]),
+ USIntervals = calculate_intervals(CFG, Liveness, Options,
+ Target, TempMap),
+ %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]),
+ Intervals = sort_on_start(USIntervals),
+ ?debug_msg("sort intervals (done) ~w\n", [erlang:statistics(runtime)]),
+ ?debug_msg("Intervals ~w\n", [Intervals]),
+ ?debug_msg("No intervals: ~w\n", [length(Intervals)]),
+ ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]),
+ Allocation = allocate(Intervals, StackSlots, SpillIndex, Target),
+ ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]),
+ Allocation.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% %%
+%% Step 2: Calculate live-intervals for each temporary. %%
+%% %%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+%% calculate_intervals(CFG, Liveness, Options, Target, TempMap)
+%% CFG: The Control-Flow Graph.
+%% Liveness: A map of live-in and live-out sets for each Basic-Block.
+%% TempMap: The TempMap from the register allocation
+%%
+%% This function will only consider the intervals of the temporaries
+%% that have been spilled during register allocation, and will ignore
+%% all other.
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+calculate_intervals(CFG, Liveness, _Options, Target, TempMap) ->
+ Interval = empty_interval(Target:number_of_temporaries(CFG)),
+ Worklist = Target:reverse_postorder(CFG),
+ intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap).
+
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+%% intervals(WorkList, Intervals, InstructionNr,
+%% CFG, Liveness, Target, TempMap)
+%% WorkList: List of BB-names to handle.
+%% Intervals: Intervals seen so far (sorted on register names).
+%% InstructionNr: The number of examined instructions.
+%% CFG: The Control-Flow Graph.
+%% Liveness: A map of live-in and live-out sets for each Basic-Block.
+%% Target: The backend for which we generate native code.
+%% TempMap: The TempMap from the register allocation
+%%
+%% This function will only consider the intervals of the temporaries
+%% that have been spilled during register allocation, and will ignore
+%% all other.
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+intervals([L|ToDO], Intervals, InstructionNr, CFG, Liveness, Target,
+ TempMap) ->
+ ?debug_msg("Block ~w\n", [L]),
+ %% Add all variables that are live at the entry of this block
+ %% to the interval data structure.
+
+ %% Only consider spilled temporaries in LiveIn
+ LiveIn = [X || X <- livein(Liveness, L, Target),
+ hipe_temp_map:is_spilled(X, TempMap)],
+ Intervals2 = add_def_point(LiveIn, InstructionNr, Intervals),
+
+ %% Only consider spilled temporaries in LiveOut
+ LiveOut = [X2 || X2 <- liveout(Liveness, L, Target),
+ hipe_temp_map:is_spilled(X2, TempMap)],
+ ?debug_msg("In ~w -> Out ~w\n", [LiveIn, LiveOut]),
+
+ %% Traverse this block instruction by instruction and add all
+ %% uses and defines to the intervals.
+ Code = hipe_bb:code(bb(CFG, L, Target)),
+ {Intervals3, NewINr} = traverse_block(Code, InstructionNr+1,
+ Intervals2, Target, TempMap),
+
+ %% Add end points for the temporaries that are in the live-out set.
+ Intervals4 = add_use_point(LiveOut, NewINr+1, Intervals3),
+
+ intervals(ToDO, Intervals4, NewINr+1, CFG, Liveness, Target, TempMap);
+intervals([], Intervals, _, _, _, _, _) ->
+ %% Return the calculated intervals
+ interval_to_list(Intervals).
+ %% Intervals.
+
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+%% traverse_block(Code, InstructionNo, Intervals, Unchanged)
+%% Examine each instruction in the Code:
+%% For each temporary T used or defined by instruction number N:
+%% extend the interval of T to include N.
+%% TempMap: The TempMap from the register allocation
+%%
+%% This function will only consider the the instruction that have temporaries
+%% that have been spilled during register allocation, and will ignore
+%% all other.
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+
+traverse_block([Instruction|Is], InstrNo, Intervals, Target, TempMap) ->
+ %% Get used temps.
+ %% Only consider spilled temporaries in the Use set.
+ UsesSet = [X || X <- uses(Instruction, Target),
+ hipe_temp_map:is_spilled(X, TempMap)],
+ %% Get defined temps.
+ %% Only consider spilled temporaries in the Def set.
+ DefsSet = [X2 || X2 <- defines(Instruction, Target),
+ hipe_temp_map:is_spilled(X2, TempMap)],
+ %% Only consider those temps that starts or ends their lifetime
+ %% within the basic block (that is remove all Unchanged temps).
+ Intervals1 = add_def_point( DefsSet, InstrNo, Intervals),
+ %% Extend the intervals for these temporaries to include InstrNo.
+ Intervals2 = add_use_point(UsesSet, InstrNo, Intervals1),
+ %% Handle the next instruction.
+ traverse_block(Is, InstrNo+1, Intervals2, Target, TempMap);
+traverse_block([], InstrNo, Intervals, _, _) ->
+ %% Return the new intervals and the number of the next instruction.
+ {Intervals,InstrNo}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% %%
+%% Step 3. Do a linear scan allocation over the live intervals. %%
+%% %%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% allocate(Intervals, PhysicalRegisters, Target)
+%%
+%% This function performs the linear scan algorithm.
+%% Intervals contains the start and stop position of each spilled temporary,
+%% sorted on increasing startpositions
+%% StackSlots is a list of available Stack slots to use. If they run out a
+%% new stack slot is allocated from an (in theory) infinite domain.
+%%
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+allocate(Intervals, StackSlots, SpillIndex, Target) ->
+ AllocatedSlots = empty_allocation(),
+ allocate(Intervals, StackSlots, [], AllocatedSlots, SpillIndex, Target).
+
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+%% allocate(Intervals, Free, Active, Allocated, SpillIndex, Target)
+%% Iterates on each temporary interval.
+%% Intervals: The list of temporary intervals.
+%% Free: Currently available stack slots.
+%% Active: Currently used stack slots (sorted on increasing
+%% interval enpoints)
+%% Allocated: The mapping of register names to spill positions.
+%% SpillIndex: The number of spilled registers.
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+allocate([TempInt|TIS], Free, Active, Alloc, SpillIndex, Target) ->
+ %% Remove from the active list those temporaries whose interval
+ %% ends before the start of the current interval.
+ {NewActive, NewFree} =
+ expire_old_intervals(Active, startpoint(TempInt), Free, Target),
+ %% Get the name of the temp in the current interval.
+ Temp = reg(TempInt),
+ case NewFree of
+ [] ->
+ %% There are no free spill slots, so we allocate a new one
+ NewSpillIndex = SpillIndex+1,
+ NewAlloc = spillalloc(Temp, SpillIndex, Alloc),
+ NewActive2 = add_active(endpoint(TempInt), SpillIndex, NewActive),
+ allocate(TIS, NewFree, NewActive2, NewAlloc, NewSpillIndex, Target);
+ [FreeSpillslot | Spillslots] ->
+ %% The spill slot FreeSpillSlot is available, let's use it.
+ allocate(TIS, Spillslots,
+ add_active(endpoint(TempInt), FreeSpillslot, NewActive),
+ spillalloc(Temp, FreeSpillslot, Alloc),
+ SpillIndex, Target)
+ end;
+allocate([], _, _, Alloc, SpillIndex, _) ->
+ %% No more register intervals to handle;
+ %% return the result sorted on regnames.
+ {lists:sort(Alloc), SpillIndex}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% expire_old_intervals(ActiveTemps, CurrentPos, FreeRegisters)
+%% Remove all temporaries that have live-ranges that ends before the
+%% current position from the active list and put them into the free
+%% list instead.
+%%
+%% ---------------------------------------------------------------------
+expire_old_intervals([Act|Acts] = AllActives, CurrentPos, Free, Target) ->
+ %% Does the live-range of the first active register end before
+ %% the current position?
+
+ %% We expand multimove before regalloc, ignore the next 2 lines.
+ %% %% We don't free registers that end at the current position,
+ %% %% since a multimove can decide to do the moves in another order...
+ case active_endpoint(Act) =< CurrentPos of
+ true -> %% Yes -> Then we can free that register.
+ Spillslot = active_spillslot(Act),
+ %% Add the spillslot to the free pool.
+ NewFree = [Spillslot|Free],
+ %% Here we could try appending the register to get a more
+ %% widespread use of registers.
+ %% Free ++ [active_spillslot(Act)]);
+ expire_old_intervals(Acts, CurrentPos, NewFree, Target);
+ false ->
+ %% No -> Then we cannot free any more temporaries.
+ %% (Since they are sorted on endpoints...)
+ {AllActives, Free}
+ end;
+expire_old_intervals([], _, Free, _) ->
+ {[], Free}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% %%
+%% D A T A S T R U C T U R E S %%
+%% & %%
+%% A U X I L I A R Y F U N C T I O N S %%
+%% %%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The "allocation datastructure"
+%%
+%% This is an order list of register names paired with their allocations.
+%% {Name, Allocation}
+%% Since we are only dealing with spills, the allocation will look like:
+%% {spill, SpillIndex}
+%%
+%% ---------------------------------------------------------------------
+
+empty_allocation() -> [].
+
+spillalloc(Name, N, Allocation) -> [{Name,{spill,N}}|Allocation].
+
+%% spillalloc(Name,N,[{Name,_}|A]) ->
+%% ?debug_msg("Spilled ~w\n",[Name]),
+%% [{Name,{spill,N}}|A];
+%% spillalloc(Name,N,[{Name2,Binding}|Bindings]) when Name > Name2 ->
+%% [{Name2,Binding}|spillalloc(Name,N,Bindings)];
+%% spillalloc(Name,N,Bindings) ->
+%% [{Name,{spill,N}}|Bindings].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% The active datastructure.
+%% Keeps tracks of currently active (allocated) spill slots.
+%% It is sorted on end points in the intervals
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_active(Endpoint, SpillSlot, [A1={P1,_}|Active]) when P1 < Endpoint ->
+ [A1|add_active(Endpoint, SpillSlot, Active)];
+add_active(Endpoint, SpillSlot, Active) ->
+ [{Endpoint, SpillSlot}|Active].
+
+active_spillslot({_,SpillSlot}) ->
+ SpillSlot.
+
+active_endpoint({EndPoint,_}) ->
+ EndPoint.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% The Interval data structure.
+%%
+%%- - - - - - - - - - - - - - - - - - - - - - - -
+
+%% mk_interval(Name, Start, End) ->
+%% {Name, Start, End}.
+
+endpoint({_R,_S,Endpoint}) ->
+ Endpoint.
+
+startpoint({_R,Startpoint,_E}) ->
+ Startpoint.
+
+reg({RegName,_S,_E}) ->
+ RegName.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% The Intervals data structure.
+
+sort_on_start(I) ->
+ lists:keysort(2, I).
+
+-ifdef(gb_intervals).
+empty_interval(_) ->
+ gb_trees:empty().
+
+interval_to_list(Intervals) ->
+ lists:flatten(
+ lists:map(
+ fun({T, I}) when is_list(I) ->
+ lists:map(
+ fun ({none, End}) ->
+ {T,End,End};
+ ({Beg, none}) ->
+ {T,Beg, Beg}
+ end,
+ I);
+ ({T,{B,E}}) -> {T, B, E}
+ end,
+ gb_trees:to_list(Intervals))).
+
+add_use_point([Temp|Temps], Pos, Intervals) ->
+ %% Extend the old interval...
+ NewInterval =
+ case gb_trees:lookup(Temp, Intervals) of
+ %% This temp has an old interval...
+ {value, Value} ->
+ %% ... extend it.
+ extend_interval(Pos, Value);
+ %% This is the first time we see this temp...
+ none ->
+ %% ... create a new interval
+ {Pos, Pos}
+ end,
+ %% Add or update the extended interval.
+ Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals),
+ %% Add the rest of the temporaries.
+ add_use_point(Temps, Pos, Intervals2);
+add_use_point([], _, I) ->
+ %% No more to add return the interval.
+ I.
+
+add_def_point([Temp|Temps], Pos, Intervals) ->
+ %% Extend the old interval...
+ NewInterval =
+ case gb_trees:lookup(Temp, Intervals) of
+ %% This temp has an old interval...
+ {value, Value} ->
+ %% ... extend it.
+ extend_interval(Pos, Value);
+ %% This is the first time we see this temp...
+ none ->
+ %% ... create a new interval
+ {Pos, Pos}
+ end,
+ %% Add or update the extended interval.
+ Intervals2 = gb_trees:enter(Temp, NewInterval, Intervals),
+ %% Add the rest of the temporaries.
+ add_def_point(Temps, Pos, Intervals2);
+add_def_point([], _, I) ->
+ %% No more to add return the interval.
+ I.
+
+extend_interval(Pos, {Beginning, End}) ->
+ %% If this position occurs before the beginning of the interval,
+ %% then extend the beginning to this position.
+ NewBeginning = erlang:min(Pos, Beginning),
+ %% If this position occurs after the end of the interval, then
+ %% extend the end to this position.
+ NewEnd = erlang:max(Pos, End),
+ {NewBeginning, NewEnd}.
+
+extend_def_interval(Pos, {Beginning, End}) ->
+ %% If this position occurs before the beginning of the interval,
+ %% then extend the beginning to this position.
+ NewBeginning = erlang:min(Pos, Beginning),
+ %% If this position occurs after the end of the interval, then
+ %% extend the end to this position.
+ NewEnd = erlang:max(Pos, End),
+ {NewBeginning, NewEnd};
+extend_def_interval(Pos, [{Beginning, none}|More]) ->
+ [{Pos,none}, {Beginning, none}|More];
+extend_def_interval(Pos, Intervals) ->
+ {Pos, Pos}.
+
+-else. %% ifdef gb_intervals
+
+empty_interval(N) ->
+ hipe_vectors:new(N, none).
+
+interval_to_list(Intervals) ->
+ add_indices(hipe_vectors:vector_to_list(Intervals), 0).
+
+add_indices([{B, E}|Xs], N) ->
+ [{N, B, E}|add_indices(Xs, N+1)];
+add_indices([List|Xs], N) when is_list(List) ->
+ flatten(List, N, Xs);
+add_indices([none|Xs], N) ->
+ add_indices(Xs, N+1);
+add_indices([], _N) -> [].
+
+flatten([{none, End}|Rest], N, More) ->
+ [{N,End,End} | flatten(Rest, N, More)];
+flatten([{Beg, none}|Rest], N ,More) ->
+ [{N,Beg,Beg} | flatten(Rest, N, More)];
+flatten([], N, More) ->
+ add_indices(More, N+1).
+
+add_use_point([Temp|Temps], Pos, Intervals) ->
+ %% Extend the old interval...
+ NewInterval =
+ case hipe_vectors:get(Intervals, Temp) of
+ %% This is the first time we see this temp...
+ none ->
+ %% ... create a new interval
+ {Pos, Pos};
+ %% This temp has an old interval...
+ Value ->
+ %% ... extend it.
+ extend_interval(Pos, Value)
+ end,
+ %% Add or update the extended interval.
+ Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval),
+ %% Add the rest of the temporaries.
+ add_use_point(Temps, Pos, Intervals2);
+add_use_point([], _, I) ->
+ %% No more to add return the interval.
+ I.
+
+add_def_point([Temp|Temps], Pos, Intervals) ->
+ %% Extend the old interval...
+ NewInterval =
+ case hipe_vectors:get(Intervals, Temp) of
+ %% This is the first time we see this temp...
+ none ->
+ %% ... create a new interval
+ {Pos, Pos};
+ %% This temp has an old interval...
+ Value ->
+ %% ... extend it.
+ extend_interval(Pos, Value)
+ end,
+ %% Add or update the extended interval.
+ Intervals2 = hipe_vectors:set(Intervals, Temp, NewInterval),
+ %% Add the rest of the temporaries.
+ add_def_point(Temps, Pos, Intervals2);
+add_def_point([], _, I) ->
+ %% No more to add return the interval.
+ I.
+
+extend_interval(Pos, {Beginning, End})
+ when is_integer(Beginning), is_integer(End) ->
+ %% If this position occurs before the beginning of the interval,
+ %% then extend the beginning to this position.
+ NewBeginning = erlang:min(Pos, Beginning),
+ %% If this position occurs after the end of the interval, then
+ %% extend the end to this position.
+ NewEnd = erlang:max(Pos, End),
+ {NewBeginning, NewEnd}.
+
+-endif. %% gb_intervals
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Interface to external functions.
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+liveness(CFG, Target) ->
+ Target:analyze(CFG).
+
+bb(CFG, L, Target) ->
+ Target:bb(CFG, L).
+
+livein(Liveness, L, Target) ->
+ regnames(Target:livein(Liveness, L), Target).
+
+liveout(Liveness, L, Target) ->
+ regnames(Target:liveout(Liveness, L), Target).
+
+uses(I, Target) ->
+ regnames(Target:uses(I), Target).
+
+defines(I, Target) ->
+ regnames(Target:defines(I), Target).
+
+regnames(Regs, Target) ->
+ [Target:reg_nr(X) || X <- Regs].
diff --git a/lib/hipe/opt/hipe_target_machine.erl b/lib/hipe/opt/hipe_target_machine.erl
new file mode 100644
index 0000000000..be9f095429
--- /dev/null
+++ b/lib/hipe/opt/hipe_target_machine.erl
@@ -0,0 +1,93 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% INTERFACE TO TARGET MACHINE MODEL
+%%
+%% Interfaces the instruction scheduler to the (resource) machine model.
+
+-module(hipe_target_machine).
+-export([init_resources/1,
+ init_instr_resources/2,
+ resources_available/4,
+ advance_cycle/1
+ ]).
+-export([raw_latency/2,
+ war_latency/2,
+ waw_latency/2,
+ %% m_raw_latency/2,
+ %% m_war_latency/2,
+ %% m_waw_latency/2,
+ m_raw_latency/0,
+ m_war_latency/0,
+ m_waw_latency/0,
+ br_to_unsafe_latency/2,
+ unsafe_to_br_latency/2,
+ br_br_latency/2
+ ]).
+
+-define(target,hipe_ultra_mod2).
+
+init_resources(X) ->
+ ?target:init_resources(X).
+
+init_instr_resources(X,Y) ->
+ ?target:init_instr_resources(X,Y).
+
+resources_available(X,Y,Z,W) ->
+ ?target:resources_available(X,Y,Z,W).
+
+advance_cycle(X) ->
+ ?target:advance_cycle(X).
+
+raw_latency(From,To) ->
+ ?target:raw_latency(From,To).
+
+war_latency(From,To) ->
+ ?target:war_latency(From,To).
+
+waw_latency(From,To) ->
+ ?target:waw_latency(From,To).
+
+%% m_raw_latency(From,To) ->
+%% ?target:m_raw_latency(From,To).
+
+%% m_war_latency(From,To) ->
+%% ?target:m_war_latency(From,To).
+
+%% m_waw_latency(From,To) ->
+%% ?target:m_waw_latency(From,To).
+
+m_raw_latency() ->
+ ?target:m_raw_latency().
+
+m_war_latency() ->
+ ?target:m_war_latency().
+
+m_waw_latency() ->
+ ?target:m_waw_latency().
+
+br_to_unsafe_latency(Br,U) ->
+ ?target:br_to_unsafe_latency(Br,U).
+
+unsafe_to_br_latency(U,Br) ->
+ ?target:unsafe_to_br_latency(U,Br).
+
+br_br_latency(Br1,Br2) ->
+ ?target:br_br_latency(Br1,Br2).
diff --git a/lib/hipe/opt/hipe_ultra_mod2.erl b/lib/hipe/opt/hipe_ultra_mod2.erl
new file mode 100644
index 0000000000..b039eaee80
--- /dev/null
+++ b/lib/hipe/opt/hipe_ultra_mod2.erl
@@ -0,0 +1,239 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% ULTRASPARC MACHINE MODEL
+%%
+%% This module is used by the scheduler.
+%% The following interface is used:
+%% ...
+%%
+%% NOTES:
+%% - the machine model is simple (on the verge of simplistic)
+%% * all FUs are pipelined => model only one cycle at a time
+%% * instruction latencies are mostly 1
+%% * floating point is left for later (I _think_ it works, but ...)
+%% - conservative: instructions that require multiple resources are
+%% modelled as 'single'; instead, they could reserve IEU+BR or whatever
+%% - possibly inefficient: I think machine state model could be turned into
+%% a bitvector.
+
+-module(hipe_ultra_mod2).
+-export([init_resources/1,
+ init_instr_resources/2,
+ resources_available/4,
+ advance_cycle/1
+ ]).
+-export([raw_latency/2,
+ war_latency/2,
+ waw_latency/2,
+ %% m_raw_latency/2,
+ %% m_war_latency/2,
+ %% m_waw_latency/2,
+ m_raw_latency/0,
+ m_war_latency/0,
+ m_waw_latency/0,
+ br_to_unsafe_latency/2,
+ unsafe_to_br_latency/2,
+ br_br_latency/2
+ ]).
+
+-include("../sparc/hipe_sparc.hrl").
+
+-define(debug(Str,Args),ok).
+%-define(debug(Str,Args),io:format(Str,Args)).
+
+-define(debug_ultra(Str,Args),ok).
+%-define(debug_ultra(Str,Args),io:format(Str,Args)).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% Straightforward and somewhat simplistic model for UltraSparc:
+%% - only one cycle at a time is modelled
+%% - resources are simplified:
+%% * ieu0, ieu1, ieu, mem, br, single
+%% * per-cycle state = done | { I0, I1, NumI, X, Mem, Br }
+%% * unoptimized representation (could be bit vector)
+
+init_resources(_Size) ->
+ ?debug_ultra('init res ~p~n',[_Size]),
+ empty_state().
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init_instr_resources(N,Nodes) ->
+ ultra_instr_rsrcs(Nodes,hipe_vectors:new(N, '')).
+
+ultra_instr_rsrcs([],I_res) -> I_res;
+ultra_instr_rsrcs([N|Ns],I_res) ->
+ ultra_instr_rsrcs(Ns,ultra_instr_type(N,I_res)).
+
+ultra_instr_type({N,I},I_res) ->
+ hipe_vectors:set(I_res,N-1,instr_type(I)).
+
+instr_type(I) ->
+ case I of
+ #move{} ->
+ ieu;
+ #multimove{} -> %% TODO: expand multimoves before scheduling
+ ieu;
+ #alu{} ->
+ case hipe_sparc:alu_operator(I) of
+ '>>' -> ieu0;
+ '<<' -> ieu0;
+ _ -> ieu
+ end;
+ #alu_cc{} ->
+ ieu1;
+ #sethi{} ->
+ ieu;
+ #load{} ->
+ mem;
+ #store{} ->
+ mem;
+ #b{} ->
+ br;
+ #br{} ->
+ br;
+ #goto{} ->
+ br;
+ #jmp_link{} -> % imprecise; should be mem+br?
+ single;
+ #jmp{} -> % imprecise
+ br;
+ #call_link{} -> % imprecise; should be mem+br?
+ single;
+ #cmov_cc{} -> % imprecise
+ single;
+ #cmov_r{} -> % imprecise
+ single;
+ #load_atom{} -> % should be resolved to sethi/or
+ single;
+ #load_address{} -> % should be resolved to sethi/or
+ single;
+ #load_word_index{} -> % should be resolved to sethi/or
+ single;
+ %% uncommon types:
+ #label{} ->
+ none;
+ #nop{} ->
+ none;
+ #comment{} ->
+ none;
+ _ ->
+ exit({ultrasparc_instr_type,{cant_schedule,I}})
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+resources_available(_Cycle, I, Rsrc, I_res) ->
+ res_avail(instruction_resource(I_res, I), Rsrc).
+
+instruction_resource(I_res, I) ->
+ hipe_vectors:get(I_res, I-1).
+
+%% The following function checks resource availability.
+%% * all function units are assumed to be fully pipelined, so only
+%% one cycle at a time is modelled.
+%% * for IEU0 and IEU1, these must precede all generic IEU instructions
+%% (handled by X bit)
+%% * at most 2 integer instructions can issue in a cycle
+%% * mem is straightforward
+%% * br closes the cycle (= returns done).
+%% * single requires an entirely empty state and closes the cycle
+
+res_avail(ieu0, { free, I1, NumI, free, Mem, Br })
+ when is_integer(NumI), NumI < 2 ->
+ { yes, { occ, I1, NumI+1, free, Mem, Br }};
+res_avail(ieu1, { _I0, free, NumI, free, Mem, Br })
+ when is_integer(NumI), NumI < 2 ->
+ { yes, { free, occ, NumI+1, free, Mem, Br }};
+res_avail(ieu, { I0, I1, NumI, _X, Mem, Br })
+ when is_integer(NumI), NumI < 2 ->
+ { yes, { I0, I1, NumI+1, occ, Mem, Br }};
+res_avail(mem, { I0, I1, NumI, X, free, Br }) ->
+ { yes, { I0, I1, NumI, X, occ, Br }};
+res_avail(br, { _I0, _I1, _NumI, _X, _Mem, free }) ->
+ { yes, done };
+res_avail(single, { free, free, 0, free, free, free }) ->
+ { yes, done };
+res_avail(_, _) ->
+ no.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+advance_cycle(_Rsrc) ->
+ empty_state().
+
+empty_state() -> { free, free, 0, free, free, free }.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Latencies are taken from UltraSparc hardware manual
+%%
+%% *** UNFINISHED ***
+%% more precisely, they are taken from my memory of the US-manual
+%% at the moment.
+%%
+%% Note: all ld/st are assumed to hit in the L1 cache (D-cache),
+%% which is sort of imprecise.
+
+raw_latency(alu, store) -> 0;
+raw_latency(load, _) -> 2; % only if load is L1 hit
+raw_latency(alu_cc, b) -> 0;
+raw_latency(_I0, _I1) ->
+ 1.
+
+war_latency(_I0, _I1) ->
+ 0.
+
+waw_latency(_I0, _I1) ->
+ 1.
+
+%% *** UNFINISHED ***
+%% At present, all load/stores are assumed to hit in the L1 cache,
+%% which isn't really satisfying.
+
+%% m_raw_latency(_St, _Ld) ->
+%% 1.
+%%
+%% m_war_latency(_Ld, _St) ->
+%% 1.
+%%
+%% m_waw_latency(_St1, _St2) ->
+%% 1.
+
+%% Use these for 'default latencies' = do not permit reordering.
+
+m_raw_latency() ->
+ 1.
+
+m_war_latency() ->
+ 1.
+
+m_waw_latency() ->
+ 1.
+
+br_to_unsafe_latency(_BrTy, _UTy) ->
+ 0.
+
+unsafe_to_br_latency(_UTy, _BrTy) ->
+ 0.
+
+br_br_latency(_BrTy1, _BrTy2) ->
+ 0.
diff --git a/lib/hipe/opt/hipe_ultra_prio.erl b/lib/hipe/opt/hipe_ultra_prio.erl
new file mode 100644
index 0000000000..9e2c1a0489
--- /dev/null
+++ b/lib/hipe/opt/hipe_ultra_prio.erl
@@ -0,0 +1,304 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%
+%% PRIORITY HANDLING AND PRIORITY CALCULATION
+%%
+%% Handling of ready nodes and priorities.
+%% Priorities are mainly from the critical path. More priorities are added.
+%% * One version is adding priorities just depending on the instr, so
+%% for example loads get higher priority than stores, and ordered
+%% after reg's and offset for better cache performance.
+%% * The other version gives higher priority to a node that adds more new
+%% nodes to the ready list. This one is maybe not so effectively
+%% implemented, but was added too late for smarter solutions.
+%% One version is commented away
+
+-module(hipe_ultra_prio).
+-export([init_ready/2,
+ init_instr_prio/2,
+ %% initial_ready_set/4,
+ next_ready/7,
+ add_ready_nodes/2,
+ insert_node/3
+ ]).
+
+-include("../sparc/hipe_sparc.hrl").
+
+% At first, only nodes with no predecessors are selected.
+% - if R is empty, there is an error (unless BB itself is empty)
+
+%% Arguments : Size - size of ready-array
+%% Preds - array with number of predecessors for each node
+%% Returns : An array with list of ready-nodes for each cycle.
+
+init_ready(Size, Preds) ->
+ P = hipe_vectors:size(Preds),
+ Ready = hipe_vectors:new(Size, []),
+ R = initial_ready_set(1, P, Preds, []),
+ hipe_vectors:set(Ready, 0, R).
+
+init_instr_prio(N, DAG) ->
+ critical_path(N, DAG).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : initial_ready_set
+%% Argument : M - current node-index
+%% N - where to stop
+%% Preds - array with number of predecessors for each node
+%% Ready - list with ready-nodes
+%% Returns : Ready - list with ready-nodes
+%% Description : Finds all nodes with no predecessors and adds them to ready.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+initial_ready_set(M, N, Preds, Ready) ->
+ if
+ M > N ->
+ Ready;
+ true ->
+ case hipe_vectors:get(Preds, M-1) of
+ 0 ->
+ initial_ready_set(M+1, N, Preds, [M|Ready]);
+ V when is_integer(V), V > 0 ->
+ initial_ready_set(M+1, N, Preds, Ready)
+ end
+ end.
+
+%% The following handles the nodes ready to schedule:
+%% 1. select the ready queue of given cycle
+%% 2. if queue empty, return none
+%% 3. otherwise, remove entry with highest priority
+%% and return {next,Highest_Prio,NewReady}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : next_ready
+%% Argument : C - current cycle
+%% Ready - array with ready nodes
+%% Prio - array with cpath-priorities for all nodes
+%% Nodes - indexed list [{N, Instr}]
+%% Returns : none / {next,Highest_Prio,NewReady}
+%% Description : 1. select the ready queue of given cycle
+%% 2. if queue empty, return none
+%% 3. otherwise, remove entry with highest priority
+%% and return {next,Highest_Prio,NewReady} where Highest_Prio
+%% = Id of instr and NewReady = updated ready-array.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+next_ready(C, Ready, Prio, Nodes, DAG, Preds, Earl) ->
+ Curr = hipe_vectors:get(Ready, C-1),
+ case Curr of
+ [] ->
+ none;
+ Instrs ->
+ {BestI,RestIs} =
+ get_best_instr(Instrs, Prio, Nodes, DAG, Preds, Earl, C),
+ {next,BestI,hipe_vectors:set(Ready,C-1,RestIs)}
+ end.
+
+% next_ready(C,Ready,Prio,Nodes) ->
+% Curr = hipe_vectors:get(Ready,C-1),
+% case Curr of
+% [] ->
+% none;
+% Instrs ->
+% {BestInstr,RestInstrs} = get_best_instr(Instrs, Prio, Nodes),
+% {next,BestInstr,hipe_vectors:set(Ready,C-1,RestInstrs)}
+% end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : get_best_instr
+%% Argument : Instrs - list of node-id's
+%% Prio - array with cpath-priorities for the nodes
+%% Nodes - indexed list [{Id, Instr}]
+%% Returns : {BestSoFar, Rest} - Id of best instr and the rest of id's
+%% Description : Returns the id of the instr that is the best choice.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+get_best_instr([Instr|Instrs], Prio, Nodes, DAG, Preds, Earl, C) ->
+ get_best_instr(Instrs, [], Instr, Prio, Nodes, DAG, Preds, Earl, C).
+
+get_best_instr([], Rest, BestSoFar, _Prio, _Nodes, _DAG, _Preds, _Earl, _C) ->
+ {BestSoFar, Rest};
+get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes,
+ DAG, Preds, Earl, C) ->
+ case better(Instr, BestSoFar, Prio, Nodes, DAG, Preds, Earl, C) of
+ true ->
+ get_best_instr(Instrs, [BestSoFar|PassedInstrs],
+ Instr, Prio, Nodes, DAG, Preds, Earl, C);
+ false ->
+ get_best_instr(Instrs, [Instr|PassedInstrs], BestSoFar, Prio,
+ Nodes, DAG, Preds, Earl, C)
+ end.
+
+% get_best_instr([Instr|Instrs], Prio, Nodes) ->
+% get_best_instr(Instrs, [], Instr, Prio, Nodes).
+
+% get_best_instr([], Rest, BestSoFar, Prio, Nodes) -> {BestSoFar, Rest};
+% get_best_instr([Instr|Instrs], PassedInstrs, BestSoFar, Prio, Nodes) ->
+% case better(Instr, BestSoFar, Prio, Nodes) of
+% true ->
+% get_best_instr(Instrs, [BestSoFar|PassedInstrs],
+% Instr, Prio, Nodes);
+% false ->
+% get_best_instr(Instrs, [Instr|PassedInstrs],BestSoFar, Prio, Nodes)
+% end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : better
+%% Argument : Instr1 - Id of instr 1
+%% Instr2 - Id of instr 2
+%% Prio - array with cpath-priorities for the nodes
+%% Nodes - indexed list [{Id, Instr}]
+%% Returns : true if Instr1 has higher priority than Instr2
+%% Description : Checks if Instr1 is a better choice than Instr2 for scheduling
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+better(Instr1, Instr2, Prio, Nodes, DAG, Preds, Earl, C) ->
+ better_hlp(priority(Instr1, Prio, Nodes, DAG, Preds, Earl, C),
+ priority(Instr2, Prio, Nodes, DAG, Preds, Earl, C)).
+
+better_hlp([], []) -> false;
+better_hlp([], [_|_]) -> false;
+better_hlp([_|_], []) -> true;
+better_hlp([X|Xs], [Y|Ys]) -> (X > Y) or ((X =:= Y) and better_hlp(Xs,Ys)).
+
+%%
+%% Returns the instr corresponding to id
+%%
+get_instr(InstrId, [{InstrId,Instr}|_]) -> Instr;
+get_instr(InstrId, [_|Xs]) -> get_instr(InstrId, Xs).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : priority
+%% Argument : InstrId - Id
+%% Prio - array with cpath-priorities for the nodes
+%% Nodes - indexed list [{Id, Instr}]
+%% Returns : PrioList - list of priorities [MostSignificant, LessSign, ...]
+%% Description : Returns a list of priorities where the first element is the
+%% cpath-priority and the rest are added depending on what kind
+%% of instr it is. Used to order loads/stores sequentially and
+%% there is possibility to add whatever stuff...
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+priority(InstrId, Prio, Nodes, DAG, Preds, Earl, C) ->
+ {ReadyNodes,_,_,_} = hipe_schedule:delete_node(C,InstrId,DAG,Preds,Earl),
+ Instr = get_instr(InstrId, Nodes),
+ Prio1 = hipe_vectors:get(Prio, InstrId-1),
+ Prio2 = length(ReadyNodes),
+ PrioRest =
+ case Instr of
+ #load_atom{} ->
+ [3];
+ #move{} ->
+ [3];
+ #load{} ->
+ Src = hipe_sparc:load_src(Instr),
+ Off = hipe_sparc:load_off(Instr),
+ case hipe_sparc:is_reg(Off) of
+ false -> [3,
+ -(hipe_sparc:reg_nr(Src)),
+ -(hipe_sparc:imm_value(Off))];
+ true -> [1]
+ end;
+ #store{} ->
+ Src = hipe_sparc:store_dest(Instr),
+ Off = hipe_sparc:store_off(Instr),
+ case hipe_sparc:is_reg(Off) of
+ false -> [2,
+ -(hipe_sparc:reg_nr(Src)),
+ -(hipe_sparc:imm_value(Off))];
+ true -> [1]
+ end;
+ _ -> [0]
+ end,
+ [Prio1,Prio2|PrioRest].
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : add_ready_nodes
+%% Argument : Nodes - list of [{Cycle,Id}]
+%% Ready - array of ready nodes for all cycles
+%% Returns : NewReady - updated ready-array
+%% Description : Gets a list of instrs and adds them to the ready-array
+%% to the corresponding cycle.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+add_ready_nodes([], Ready) -> Ready;
+add_ready_nodes([{C,I}|Xs], Ready) ->
+ add_ready_nodes(Xs, insert_node(C, I, Ready)).
+
+insert_node(C, I, Ready) ->
+ Old = hipe_vectors:get(Ready, C-1),
+ hipe_vectors:set(Ready, C-1, [I|Old]).
+
+%%
+%% Computes the latency for the "most expensive" way through the graph
+%% for all nodes. Returns an array of priorities for all nodes.
+%%
+critical_path(N, DAG) ->
+ critical_path(1, N, DAG, hipe_vectors:new(N, -1)).
+
+critical_path(M, N, DAG, Prio) ->
+ if
+ M > N ->
+ Prio;
+ true ->
+ critical_path(M+1, N, DAG, cpath(M, DAG, Prio))
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Function : cpath
+%% Argument : M - current node id
+%% DAG - the dependence graph
+%% Prio - array of priorities for all nodes
+%% Returns : Prio - updated prio array
+%% Description : If node has prio -1, it has not been visited
+%% - otherwise, compute priority as max of priorities of
+%% successors (+ latency)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+cpath(M, DAG, Prio) ->
+ InitPrio = hipe_vectors:get(Prio, M-1),
+ if
+ InitPrio =:= -1 ->
+ cpath_node(M, DAG, Prio);
+ true ->
+ Prio
+ end.
+
+cpath_node(N, DAG, Prio) ->
+ SuccL = dag_succ(DAG, N),
+ {Max, NewPrio} = cpath_succ(SuccL, DAG, Prio),
+ hipe_vectors:set(NewPrio, N-1, Max).
+
+cpath_succ(SuccL, DAG, Prio) ->
+ cpath_succ(SuccL, DAG, Prio, 0).
+
+%% performs an unnecessary lookup of priority of Succ, but that might
+%% not be such a big deal
+
+cpath_succ([], _DAG, Prio, NodePrio) -> {NodePrio,Prio};
+cpath_succ([{Lat,Succ}|Xs], DAG, Prio, NodePrio) ->
+ NewPrio = cpath(Succ, DAG, Prio),
+ NewNodePrio = erlang:max(hipe_vectors:get(NewPrio, Succ - 1) + Lat, NodePrio),
+ cpath_succ(Xs, DAG, NewPrio, NewNodePrio).
+
+dag_succ(DAG, N) when is_integer(N) ->
+ hipe_vectors:get(DAG, N-1).
+