diff options
Diffstat (limited to 'lib/hipe/opt/hipe_ultra_mod2.erl')
-rw-r--r-- | lib/hipe/opt/hipe_ultra_mod2.erl | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/lib/hipe/opt/hipe_ultra_mod2.erl b/lib/hipe/opt/hipe_ultra_mod2.erl new file mode 100644 index 0000000000..b039eaee80 --- /dev/null +++ b/lib/hipe/opt/hipe_ultra_mod2.erl @@ -0,0 +1,239 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ULTRASPARC MACHINE MODEL +%% +%% This module is used by the scheduler. +%% The following interface is used: +%% ... +%% +%% NOTES: +%% - the machine model is simple (on the verge of simplistic) +%% * all FUs are pipelined => model only one cycle at a time +%% * instruction latencies are mostly 1 +%% * floating point is left for later (I _think_ it works, but ...) +%% - conservative: instructions that require multiple resources are +%% modelled as 'single'; instead, they could reserve IEU+BR or whatever +%% - possibly inefficient: I think machine state model could be turned into +%% a bitvector. + +-module(hipe_ultra_mod2). +-export([init_resources/1, + init_instr_resources/2, + resources_available/4, + advance_cycle/1 + ]). +-export([raw_latency/2, + war_latency/2, + waw_latency/2, + %% m_raw_latency/2, + %% m_war_latency/2, + %% m_waw_latency/2, + m_raw_latency/0, + m_war_latency/0, + m_waw_latency/0, + br_to_unsafe_latency/2, + unsafe_to_br_latency/2, + br_br_latency/2 + ]). + +-include("../sparc/hipe_sparc.hrl"). + +-define(debug(Str,Args),ok). +%-define(debug(Str,Args),io:format(Str,Args)). + +-define(debug_ultra(Str,Args),ok). +%-define(debug_ultra(Str,Args),io:format(Str,Args)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Straightforward and somewhat simplistic model for UltraSparc: +%% - only one cycle at a time is modelled +%% - resources are simplified: +%% * ieu0, ieu1, ieu, mem, br, single +%% * per-cycle state = done | { I0, I1, NumI, X, Mem, Br } +%% * unoptimized representation (could be bit vector) + +init_resources(_Size) -> + ?debug_ultra('init res ~p~n',[_Size]), + empty_state(). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_instr_resources(N,Nodes) -> + ultra_instr_rsrcs(Nodes,hipe_vectors:new(N, '')). + +ultra_instr_rsrcs([],I_res) -> I_res; +ultra_instr_rsrcs([N|Ns],I_res) -> + ultra_instr_rsrcs(Ns,ultra_instr_type(N,I_res)). + +ultra_instr_type({N,I},I_res) -> + hipe_vectors:set(I_res,N-1,instr_type(I)). + +instr_type(I) -> + case I of + #move{} -> + ieu; + #multimove{} -> %% TODO: expand multimoves before scheduling + ieu; + #alu{} -> + case hipe_sparc:alu_operator(I) of + '>>' -> ieu0; + '<<' -> ieu0; + _ -> ieu + end; + #alu_cc{} -> + ieu1; + #sethi{} -> + ieu; + #load{} -> + mem; + #store{} -> + mem; + #b{} -> + br; + #br{} -> + br; + #goto{} -> + br; + #jmp_link{} -> % imprecise; should be mem+br? + single; + #jmp{} -> % imprecise + br; + #call_link{} -> % imprecise; should be mem+br? + single; + #cmov_cc{} -> % imprecise + single; + #cmov_r{} -> % imprecise + single; + #load_atom{} -> % should be resolved to sethi/or + single; + #load_address{} -> % should be resolved to sethi/or + single; + #load_word_index{} -> % should be resolved to sethi/or + single; + %% uncommon types: + #label{} -> + none; + #nop{} -> + none; + #comment{} -> + none; + _ -> + exit({ultrasparc_instr_type,{cant_schedule,I}}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +resources_available(_Cycle, I, Rsrc, I_res) -> + res_avail(instruction_resource(I_res, I), Rsrc). + +instruction_resource(I_res, I) -> + hipe_vectors:get(I_res, I-1). + +%% The following function checks resource availability. +%% * all function units are assumed to be fully pipelined, so only +%% one cycle at a time is modelled. +%% * for IEU0 and IEU1, these must precede all generic IEU instructions +%% (handled by X bit) +%% * at most 2 integer instructions can issue in a cycle +%% * mem is straightforward +%% * br closes the cycle (= returns done). +%% * single requires an entirely empty state and closes the cycle + +res_avail(ieu0, { free, I1, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { occ, I1, NumI+1, free, Mem, Br }}; +res_avail(ieu1, { _I0, free, NumI, free, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { free, occ, NumI+1, free, Mem, Br }}; +res_avail(ieu, { I0, I1, NumI, _X, Mem, Br }) + when is_integer(NumI), NumI < 2 -> + { yes, { I0, I1, NumI+1, occ, Mem, Br }}; +res_avail(mem, { I0, I1, NumI, X, free, Br }) -> + { yes, { I0, I1, NumI, X, occ, Br }}; +res_avail(br, { _I0, _I1, _NumI, _X, _Mem, free }) -> + { yes, done }; +res_avail(single, { free, free, 0, free, free, free }) -> + { yes, done }; +res_avail(_, _) -> + no. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +advance_cycle(_Rsrc) -> + empty_state(). + +empty_state() -> { free, free, 0, free, free, free }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Latencies are taken from UltraSparc hardware manual +%% +%% *** UNFINISHED *** +%% more precisely, they are taken from my memory of the US-manual +%% at the moment. +%% +%% Note: all ld/st are assumed to hit in the L1 cache (D-cache), +%% which is sort of imprecise. + +raw_latency(alu, store) -> 0; +raw_latency(load, _) -> 2; % only if load is L1 hit +raw_latency(alu_cc, b) -> 0; +raw_latency(_I0, _I1) -> + 1. + +war_latency(_I0, _I1) -> + 0. + +waw_latency(_I0, _I1) -> + 1. + +%% *** UNFINISHED *** +%% At present, all load/stores are assumed to hit in the L1 cache, +%% which isn't really satisfying. + +%% m_raw_latency(_St, _Ld) -> +%% 1. +%% +%% m_war_latency(_Ld, _St) -> +%% 1. +%% +%% m_waw_latency(_St1, _St2) -> +%% 1. + +%% Use these for 'default latencies' = do not permit reordering. + +m_raw_latency() -> + 1. + +m_war_latency() -> + 1. + +m_waw_latency() -> + 1. + +br_to_unsafe_latency(_BrTy, _UTy) -> + 0. + +unsafe_to_br_latency(_UTy, _BrTy) -> + 0. + +br_br_latency(_BrTy1, _BrTy2) -> + 0. |