hipe: Add hipe_regalloc_prepass

hipe_regalloc_prepass speeds up register allocation by spilling any temp that is live over a call (which clobbers all register). In order to detect these, a new function was added to the target interface; defines_all_alloc/1, that takes an instruction and returns a boolean.
author: Magnus Lång <[email protected]> 2016-05-28 20:22:34 +0200
committer: Magnus Lång <[email protected]> 2016-09-02 15:04:45 +0200
commit: 85bd166647e7f260fd665eb44da9151c0d88f208 (patch)
tree: 84878ab624fcc22246bae89b75a6857415508539 /lib
parent: c2f8b61ca3682281752fa0984699214dfcbf7ccd (diff)
download: otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.gz
otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.bz2
otp-85bd166647e7f260fd665eb44da9151c0d88f208.zip
22 files changed, 490 insertions, 11 deletions
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl
index ada5311453..7c6965b938 100644
--- a/lib/hipe/amd64/hipe_amd64_registers.erl
+++ b/lib/hipe/amd64/hipe_amd64_registers.erl
@@ -253,6 +253,9 @@ ret(N) ->
     _ -> exit({?MODULE, ret, N})
   end.
 
+%% Note: the fact that (allocatable() UNION allocatable_x87() UNION
+%% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in
+%% hipe_x86_defuse:insn_defs_all/1
 call_clobbered() ->
   [{?RAX,tagged},{?RAX,untagged},	% does the RA strip the type or not?
    {?RDX,tagged},{?RDX,untagged},
diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl
index f57b0e601c..f92cf4f82a 100644
--- a/lib/hipe/arm/hipe_arm_defuse.erl
+++ b/lib/hipe/arm/hipe_arm_defuse.erl
@@ -22,6 +22,7 @@
 -module(hipe_arm_defuse).
 -export([insn_def_all/1, insn_use_all/1]).
 -export([insn_def_gpr/1, insn_use_gpr/1]).
+-export([insn_defs_all_gpr/1]).
 -include("hipe_arm.hrl").
 
 %%%
@@ -55,6 +56,12 @@ insn_def_gpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_gpr(I) ->
+  case I of
+    #pseudo_call{} -> true;
+    _ -> false
+  end.
+
 call_clobbered_gpr() ->
   [hipe_arm:mk_temp(R, T)
    || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()].
diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl
index dcf039676b..3ecf2f2fdb 100644
--- a/lib/hipe/arm/hipe_arm_registers.erl
+++ b/lib/hipe/arm/hipe_arm_registers.erl
@@ -180,6 +180,8 @@ is_arg(R) ->
     _ -> false
   end.
 
+%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is
+%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1
 call_clobbered() ->		% does the RA strip the type or not?
   [{?R0,tagged},{?R0,untagged},
    {?R1,tagged},{?R1,untagged},
diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src
index 6c3a2741b3..e4796368c3 100644
--- a/lib/hipe/main/hipe.app.src
+++ b/lib/hipe/main/hipe.app.src
@@ -145,6 +145,7 @@
 	     hipe_profile,
 	     hipe_reg_worklists,
 	     hipe_regalloc_loop,
+	     hipe_regalloc_prepass,
 	     hipe_rtl,
 	     hipe_rtl_arch,
 	     hipe_rtl_arith_32,
diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl
index 77b84dc574..305e88488d 100644
--- a/lib/hipe/ppc/hipe_ppc_defuse.erl
+++ b/lib/hipe/ppc/hipe_ppc_defuse.erl
@@ -22,6 +22,7 @@
 -module(hipe_ppc_defuse).
 -export([insn_def_all/1, insn_use_all/1]).
 -export([insn_def_gpr/1, insn_use_gpr/1]).
+-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]).
 -export([insn_def_fpr/1, insn_use_fpr/1]).
 -include("hipe_ppc.hrl").
 
@@ -52,6 +53,9 @@ insn_def_gpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_gpr(#pseudo_call{}) -> true;
+insn_defs_all_gpr(_) -> false.
+
 call_clobbered_gpr() ->
   [hipe_ppc:mk_temp(R, T)
    || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()].
@@ -116,6 +120,9 @@ insn_def_fpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_fpr(#pseudo_call{}) -> true;
+insn_defs_all_fpr(_) -> false.
+
 call_clobbered_fpr() ->
   [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()].
 
diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl
index f4781d5ed7..8f6d9779fc 100644
--- a/lib/hipe/ppc/hipe_ppc_registers.erl
+++ b/lib/hipe/ppc/hipe_ppc_registers.erl
@@ -201,6 +201,8 @@ is_arg(R) ->
     _ -> false
   end.
 
+%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is
+%% hard-coded in hipe_ppc_defuse:insn_defs_all_gpr/1
 call_clobbered() ->		% does the RA strip the type or not?
   [{?R0,tagged},{?R0,untagged},
    %% R1 is reserved for C
diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile
index ceb535f1c7..209f230a9b 100644
--- a/lib/hipe/regalloc/Makefile
+++ b/lib/hipe/regalloc/Makefile
@@ -51,6 +51,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \
 	  hipe_coalescing_regalloc \
 	  hipe_graph_coloring_regalloc \
 	  hipe_regalloc_loop \
+	  hipe_regalloc_prepass \
 	  hipe_ls_regalloc \
 	  hipe_ppc_specific hipe_ppc_specific_fp \
 	  hipe_sparc_specific hipe_sparc_specific_fp \
diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
index 2e5804337d..6ef79ce95d 100644
--- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
+++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl
@@ -33,6 +33,7 @@
          liveout/2,
          uses/1,
          defines/1,
+	 defines_all_alloc/1,
 	 def_use/1,
 	 is_arg/1,	%% used by hipe_ls_regalloc
 	 is_move/1,
@@ -174,6 +175,8 @@ defines(I) ->
 	     hipe_x86:temp_is_allocatable(X),
 	     hipe_x86:temp_type(X) =:= 'double'].
 
+defines_all_alloc(I) -> hipe_amd64_defuse:insn_defs_all(I).
+
 is_move(Instruction) ->
   case hipe_x86:is_fmove(Instruction) of
     true ->
diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl
index e04d80f690..c696668399 100644
--- a/lib/hipe/regalloc/hipe_arm_specific.erl
+++ b/lib/hipe/regalloc/hipe_arm_specific.erl
@@ -40,6 +40,7 @@
 	 ,livein/2
 	 ,uses/1
 	 ,defines/1
+	 ,defines_all_alloc/1
 	]).
 
 %% for hipe_graph_coloring_regalloc:
@@ -129,6 +130,9 @@ defines(I) ->
   [X || X <- hipe_arm_defuse:insn_def_gpr(I),
 	hipe_arm:temp_is_allocatable(X)].
 
+defines_all_alloc(I) ->
+  hipe_arm_defuse:insn_defs_all_gpr(I).
+
 is_move(Instruction) ->
   case hipe_arm:is_pseudo_move(Instruction) of
     true ->
diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl
index 988501c96f..79ca333865 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific.erl
@@ -40,6 +40,7 @@
 	 ,livein/2
 	 ,uses/1
 	 ,defines/1
+	 ,defines_all_alloc/1
 	]).
 
 %% for hipe_graph_coloring_regalloc:
@@ -129,6 +130,9 @@ defines(I) ->
   [X || X <- hipe_ppc_defuse:insn_def_gpr(I),
 	hipe_ppc:temp_is_allocatable(X)].
 
+defines_all_alloc(I) ->
+  hipe_ppc_defuse:insn_defs_all_gpr(I).
+
 is_move(Instruction) ->
   case hipe_ppc:is_pseudo_move(Instruction) of
     true ->
diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
index 6f00111777..4bf1385e0f 100644
--- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl
@@ -40,6 +40,7 @@
 	 ,livein/2
 	 ,uses/1
 	 ,defines/1
+	 ,defines_all_alloc/1
 	]).
 
 %% for hipe_graph_coloring_regalloc:
@@ -120,6 +121,9 @@ uses(I) ->
 defines(I) ->
   hipe_ppc_defuse:insn_def_fpr(I).
 
+defines_all_alloc(I) ->
+  hipe_ppc_defuse:insn_defs_all_fpr(I).
+
 is_move(I) ->
   hipe_ppc:is_pseudo_fmove(I).
  
diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl
index fa42cdd0fb..33a7130ff1 100644
--- a/lib/hipe/regalloc/hipe_regalloc_loop.erl
+++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl
@@ -43,7 +43,8 @@ ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) ->
 alloc(Defun, CFG, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) ->
   ?inc_counter(ra_iteration_counter, 1),
   {Coloring, _NewSpillIndex, Liveness} =
-    RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit,TargetMod, Options),
+    hipe_regalloc_prepass:regalloc(
+      RegAllocMod, CFG, SpillIndex, SpillLimit, TargetMod, Options),
   {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring),
   case DidSpill of
     false -> %% No new temps, we are done.
diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
new file mode 100644
index 0000000000..f9df3a68de
--- /dev/null
+++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl
@@ -0,0 +1,400 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%@doc
+%%	       PREPASS FOR ITERATED REGISTER ALLOCATORS
+%%
+%% Implements a trivial partial but optimal fast register allocator to be used
+%% as the first pass of the register allocation loop.
+%%
+%% The idea is to drastically reduce the number of temporaries, so as to speed
+%% up the real register allocators.
+%%
+%%  * Spills trivially unallocatable temps
+%%    This relies on the fact that calls intentionally clobber all registers.
+%%    Since this is the case, any temp that is alive over a call can't possibly
+%%    be allocated to a
+%%
+%% TODO: Partition the IG (without constructing it) into connected components
+%% and call the actual register allocator on them individually.
+
+-module(hipe_regalloc_prepass).
+-export([regalloc/6]).
+
+%% -ifndef(DEBUG).
+%% -compile(inline).
+%% -endif.
+
+%%-define(DO_ASSERT, 1).
+-include("../main/hipe.hrl").
+
+%% We present a "pseudo-target" to the register allocator we wrap.
+%% Note: all arities are +1 as we're currently using the parameterised module
+%% facility to store context data.
+-export([analyze/2,
+	 all_precoloured/1,
+	 allocatable/1,
+	 args/2,
+	 bb/3,
+	 breadthorder/2,
+	 def_use/2,
+	 defines/2,
+	 is_fixed/2,	% used by hipe_graph_coloring_regalloc
+	 is_global/2,
+	 is_move/2,
+	 is_precoloured/2,
+	 labels/2,
+	 livein/3,
+	 liveout/3,
+	 non_alloc/2,
+	 number_of_temporaries/2,
+	 physical_name/2,
+	 postorder/2,
+	 reg_nr/2,
+	 uses/2,
+	 var_range/2,
+	 reverse_postorder/2]).
+
+%% Eww, parameterised module. Can we fix it without having to touch all the
+%% register allocators?
+-record(?MODULE,
+	{target   :: module()
+	,sub      :: sub_map() % Translates temp numbers found in CFG and understood by
+			       % Target to temp numbers passed to RegAllocMod.
+	,inv      :: inv_map() % Translates temp numbers passed to RegAllocMod
+			       % to temp numbers found in CFG and understood by
+			       % Target
+	,max_phys :: temp()    % Exclusive upper bound on physical registers
+	}).
+
+-record(cfg,
+	{cfg     :: target_cfg()
+	,bbs     :: #{label() => hipe_bb:bb(instr())}
+	,max_reg :: temp()    % Exclusive upper bound on temp numbers
+	,live    :: target_liveness()
+	}).
+
+-record(instr,
+	{tgt_instr :: target_instr() % Not used, for debugging
+	,defuse    :: {[temp()], [temp()]}
+	,is_move   :: boolean()
+	}).
+%% -record(label,
+%% 	{no :: label()
+%% 	}).
+-type instr() :: #instr{} %% | #label{}
+		 .
+
+-type target_cfg() :: any().
+-type target_instr() :: any().
+-type target_temp() :: any().
+-type target_reg() :: non_neg_integer().
+-type target_liveness() :: any().
+-type spillno() :: non_neg_integer().
+-type temp() :: non_neg_integer().
+-type label() :: non_neg_integer().
+
+-spec regalloc(module(), target_cfg(), spillno(), spillno(), module(),
+	       proplists:proplist())
+	      -> {hipe_map(), spillno(), target_liveness()}.
+regalloc(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options) ->
+  Liveness = Target:analyze(CFG),
+  {BBs0, LivePseudos, _Unused, SpilledPseudos, SpillIndex} =
+    scan_cfg(CFG, Liveness, SpillIndex0, Target),
+
+  {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} =
+    number_and_map(Target:all_precoloured(), LivePseudos, SpillLimit),
+  BBs = transform_cfg(BBs0, SubMap),
+  SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, live=Liveness},
+  SubTarget = #?MODULE{target=Target, max_phys=MaxPhys, inv=InvMap, sub=SubMap},
+  case
+    RegAllocMod:regalloc(SubMod, SpillIndex, SubSpillLimit, SubTarget, Options)
+  of
+    {SubColoring, NewSpillIndex} -> ok;
+    {SubColoring, NewSpillIndex, _SubLiveness} -> ok
+  end,
+  ?ASSERT(check_coloring(SubColoring, SubMod, SubTarget)), % blame the RA ;)
+  SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpilledPseudos)],
+  Coloring = translate_coloring(SubColoring, InvMap) ++ SpillColors,
+  ?ASSERT(check_coloring(Coloring, CFG, Target)), % Sanity-check
+  ?ASSERT(just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target,
+			  Options, Coloring, _Unused)),
+  {Coloring, NewSpillIndex, Liveness}.
+
+number_and_map(Phys, Pseud, SpillLimit) ->
+  MaxPhys = lists:max(Phys) + 1,
+  case Pseud of [] -> ok; _ ->
+      case lists:min(Pseud) of % Assertion
+	FirstPseudo when FirstPseudo >= MaxPhys -> ok
+      end end,
+  NrPseuds = length(Pseud),
+  MaxR = MaxPhys+NrPseuds,
+  PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)),
+  MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped
+    ++ PseudNrs,
+  SubMap = maps:from_list(MapList),
+  InvMap = maps:from_list([{Fake, Real} || {Real, Fake} <- MapList]),
+  LastPseudo = case Pseud of [] -> MaxPhys-1; _ -> lists:max(Pseud) end,
+  SubSpillLimit = if SpillLimit > LastPseudo -> MaxR;
+		     true -> map_get(SpillLimit, SubMap)
+		  end,
+  {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}.
+
+-record(spill_state,
+	{map :: spill_map()
+	,ix  :: spillno()
+	}).
+-type spill_state() :: #spill_state{}.
+-type spill_map()   :: #{target_reg() => spillno()}.
+
+-spec scan_cfg(target_cfg(), target_liveness(), spillno(), module())
+	      -> {#{label() => [instr()]}
+		 ,ordsets:set(target_reg())
+		 ,ordsets:set(target_reg())
+		 ,spill_map()
+		 ,spillno()
+		 }.
+scan_cfg(CFG, Liveness, SpillIndex0, Target) ->
+  State0 = #spill_state{map=#{}, ix=SpillIndex0},
+  {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} =
+    scan_bbs(Target:labels(CFG), CFG, Liveness, #{}, State0, #{}, Target),
+  SeenOSet = ordsets:from_list(maps:keys(Seen)),
+  SpillOSet = ordsets:from_list(maps:keys(Spill)),
+  PhysOSet = ordsets:from_list(Target:all_precoloured()),
+  Dead = ordsets:union(SpillOSet, PhysOSet),
+  LivePseudos = ordsets:subtract(SeenOSet, Dead),
+  {_TMin, _TMax} = Target:var_range(CFG),
+  _Unused = ordsets:subtract(lists:seq(_TMin, _TMax),
+			     ordsets:union(SeenOSet, PhysOSet)),
+  {BBs, LivePseudos, _Unused, Spill, SpillIndex}.
+
+-type seen() :: #{target_reg() => []}. % set
+
+-spec scan_bbs([label()], target_cfg(), target_liveness(), seen(),
+	       spill_state(), #{label() => [instr()]}, module())
+	      -> {#{label() => [instr()]}, seen(), spill_state()}.
+scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _Target) -> {BBs, Seen, State};
+scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, Target) ->
+    {Code, _, Seen, State} = scan_bb(hipe_bb:code(Target:bb(CFG, L)),
+				     t_liveout(Liveness, L, Target),
+				     Seen0, State0, Target),
+    scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>Code}, Target).
+
+t_liveout(Liveness, L, Target) ->
+  %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted
+  %% or consist of a few sorted subsequences (per type)
+  ordsets:from_list(reg_names(Target:liveout(Liveness, L), Target)).
+
+-spec reg_names([target_temp()], module()) -> [target_reg()].
+reg_names(Regs, Target) ->
+  [Target:reg_nr(X) || X <- Regs].
+
+scan_bb([], Live, Seen, State, _Target) -> {[], Live, Seen, State};
+scan_bb([I|Is0], Live0, Seen0, State0, Target) ->
+  %% We could pass Target in the return tuple to make the stack frames
+  %% smaller, but without multireturn opt, it's probably not worth it.
+  {Is1, Live1, Seen1, State1} = scan_bb(Is0, Live0, Seen0, State0, Target),
+  {TDef, TUse} = Target:def_use(I),
+  Def = ordsets:from_list(reg_names(TDef, Target)),
+  Use = ordsets:from_list(reg_names(TUse, Target)),
+  Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live1, Def)),
+  Seen = add_seen(Def, add_seen(Use, Seen1)),
+  State =
+    case Target:defines_all_alloc(I) of
+      false -> State1;
+      true -> spill_all(ToSpill, Target, State1)
+    end,
+  NewI = #instr{tgt_instr=I, defuse={Def, Use}, is_move=Target:is_move(I)},
+  {[NewI|Is1], Live, Seen, State}.
+
+add_seen([], Seen) -> Seen;
+add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}).
+
+spill_all([], _Target, State) -> State;
+spill_all([R|Rs], Target, State=#spill_state{map=Map, ix=Ix}) ->
+  case Target:is_precoloured(R) or maps:is_key(R, Map) of
+    true -> spill_all(Rs, Target, State);
+    false -> spill_all(Rs, Target, State#spill_state{map=Map#{R=>Ix}, ix=Ix+1})
+  end.
+
+transform_cfg(BBs0, SubMap) ->
+  maps:map(fun(_, BB) -> transform_bb(BB, SubMap) end, BBs0).
+
+transform_bb(BB, SubMap) ->
+  hipe_bb:mk_bb([I#instr{defuse={map_get_all_partial(Def, SubMap),
+				 map_get_all_partial(Use, SubMap)}}
+		 || I = #instr{defuse={Def,Use}} <- BB]).
+
+-spec translate_coloring(hipe_map(), _) -> hipe_map().
+translate_coloring(SubColoring, InvMap) ->
+  lists:map(fun({T, P}) -> {map_get(T, InvMap), P} end, SubColoring).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Interference graph partitioning
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% We partition the program
+
+%% The algorithm considers two kinds of components; those that are local to a
+%% basic block, and those that are not. The key is that any basic block belongs
+%% to at most two non-local components; one from the beginning to the first
+%% split point, and one from the end to the last split point.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Temp map ADT
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+-type sub_map() :: #{target_temp() => temp()}.
+-type inv_map() :: #{temp() => target_temp()}.
+map_get(Temp, Map) when is_integer(Temp) -> maps:get(Temp, Map).
+
+%% map_get_all(Ts, Map) -> [map_get(T, Map) || T <- Ts].
+
+map_get_all_partial([], _) -> [];
+map_get_all_partial([T|Ts], Map) when is_integer(T) ->
+  case Map of
+    #{T := R} -> [R|map_get_all_partial(Ts, Map)];
+    #{} -> map_get_all_partial(Ts, Map)
+  end.
+
+-ifdef(DO_ASSERT).
+%%%%%%%%%%%%%%%%%%%%
+%% Check that the coloring is correct (if the IG is correct):
+%%
+
+%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want.
+-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)).
+-define(report(X,Y),  ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). 
+-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). 
+-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)).
+
+check_coloring(Coloring, CFG, Target) ->
+  ?report0("checking coloring ~p~n",[Coloring]),
+  IG = hipe_ig:build(CFG, Target),
+  check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)),
+	     init_coloring(Coloring, Target)).
+
+init_coloring(Xs, Target) ->
+  hipe_temp_map:cols2tuple(Xs, Target).
+
+check_color_of(X, Cols) ->
+%%    if
+%%	is_precoloured(X) ->
+%%	    phys_reg_color(X,Cols);
+%%	true ->
+  case hipe_temp_map:find(X, Cols) of
+    unknown ->
+      ?WARNING_MSG("node ~p: color not found~n", [X]),
+      uncolored;
+    C ->
+      C
+  end.
+
+check_cols([], _Cols) ->
+  ?report("coloring valid~n",[]),
+  true;
+check_cols([{X,Neighbours}|Xs], Cols) ->
+  Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours],
+  C = check_color_of(X, Cols),
+  case valid_coloring(X, C, Cs) of
+    yes ->
+      check_cols(Xs, Cols);
+    {no,Invalids} ->
+      ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]),
+      check_cols(Xs, Cols) andalso false
+  end.
+
+valid_coloring(_X, _C, []) ->
+  yes;
+valid_coloring(X, C, [{Y,C}|Ys]) ->
+  case valid_coloring(X, C, Ys) of
+    yes -> {no, [Y]};
+    {no,Zs} -> {no, [Y|Zs]}
+  end;
+valid_coloring(X, C, [_|Ys]) ->
+  valid_coloring(X, C, Ys).
+
+%%%%%%%%%%%%%%%%%%%%
+%% Check that no register allocation opportinities were missed due to ?MODULE
+%%
+just_as_good_as(RegAllocMod, CFG, SpillIndex0, SpillLimit, Target, Options,
+		Coloring, Unused) ->
+  {CheckColoring, _} = RegAllocMod:regalloc(CFG, SpillIndex0, SpillLimit,
+					    Target, Options),
+  Now   = lists:sort([vt(C) || C <- Coloring]),
+  Check = lists:sort([vt(C) || C={R,_} <- CheckColoring,
+			       not ordsets:is_element(R, Unused)]),
+  case Now of
+    Check -> true;
+    _ ->
+      io:fwrite(standard_error, "Colorings differ (~w, sub: ~w, full: ~w)!~n"
+		%% "Sub:~w~n"
+		"Unused: ~w~n"
+		"Now:~w~nCorrect:~w~n",
+		[Target, count(Coloring), count(CheckColoring),
+		 %% SubColoring,
+		 Unused,
+		 Now -- Check, Check -- Now]),
+	false
+  end.
+
+count(C) -> {length([[] || {_, {reg, _}} <- C]),
+	     length([[] || {_, {spill, _}} <- C])}.
+
+vt({R, {reg, _}}) -> {R, reg};
+vt({R, {spill, _}}) -> {R, spill}.
+-endif.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Pseudo-target interface
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+analyze(Cfg, _ModRec) -> Cfg.
+bb(#cfg{bbs=BBs}, Ix, _ModRec) -> maps:get(Ix, BBs).
+args(Arity, #?MODULE{target=Target, sub=SubM}) ->
+    map_get(Target:args(Arity), SubM).
+labels(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:labels(CFG).
+livein(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) ->
+  map_get_all_partial(reg_names(Target:livein(Live, Lb), Target), SubM). % Should we precompute?
+liveout(#cfg{live=Live}, Lb, #?MODULE{target=Target, sub=SubM}) ->
+  map_get_all_partial(reg_names(Target:liveout(Live, Lb), Target), SubM). % dito
+uses(I, MR) -> element(2, def_use(I, MR)).
+defines(I, MR) -> element(1, def_use(I, MR)).
+def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse.
+is_move(#instr{is_move=IM}, _ModRec) -> IM.
+is_fixed(Reg, #?MODULE{target=Target,inv=InvM}) ->
+  Target:is_fixed(map_get(Reg, InvM)). % XXX: Is this hot?
+is_global(Reg, #?MODULE{target=Target,max_phys=MaxPhys}) when Reg < MaxPhys ->
+  Target:is_global(Reg). % assume id-map
+is_precoloured(Reg, #?MODULE{max_phys=MaxPhys}) -> Reg < MaxPhys.
+reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally)
+non_alloc(#cfg{cfg=CFG}, #?MODULE{target=Target,sub=SubM}) ->
+  map_get_all_partial(reg_names(Target:non_alloc(CFG), Target), SubM).
+number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR.
+allocatable(#?MODULE{target=Target}) -> Target:allocatable(). % assume id-map
+physical_name(Reg, _ModRec) -> Reg. % XXX: is this correct?
+all_precoloured(#?MODULE{target=Target}) -> Target:all_precoloured(). % dito
+var_range(#cfg{cfg=CFG, max_reg=MaxReg}, #?MODULE{target=Target}) ->
+  {0, _} = Target:var_range(CFG),
+  {0, MaxReg-1}. % What is the correct answer?
+
+breadthorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) ->
+  Target:breadthorder(CFG).
+postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) -> Target:postorder(CFG).
+reverse_postorder(#cfg{cfg=CFG}, #?MODULE{target=Target}) ->
+  Target:reverse_postorder(CFG).
diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl
index 29d0908caf..7df1bbe113 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific.erl
@@ -40,6 +40,7 @@
 	 ,livein/2
 	 ,uses/1
 	 ,defines/1
+	 ,defines_all_alloc/1
 	]).
 
 %% for hipe_graph_coloring_regalloc:
@@ -129,6 +130,9 @@ defines(I) ->
   [X || X <- hipe_sparc_defuse:insn_def_gpr(I),
 	hipe_sparc:temp_is_allocatable(X)].
 
+defines_all_alloc(I) ->
+  hipe_sparc_defuse:insn_defs_all_gpr(I).
+
 is_move(Instruction) ->
   case hipe_sparc:is_pseudo_move(Instruction) of
     true ->
diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
index 08c2541b41..fd80053708 100644
--- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
+++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl
@@ -40,6 +40,7 @@
 	 ,livein/2
 	 ,uses/1
 	 ,defines/1
+	 ,defines_all_alloc/1
 	]).
 
 %% for hipe_graph_coloring_regalloc:
@@ -120,6 +121,9 @@ uses(I) ->
 defines(I) ->
   hipe_sparc_defuse:insn_def_fpr(I).
 
+defines_all_alloc(I) ->
+  hipe_sparc_defuse:insn_defs_all_fpr(I).
+
 is_move(I) ->
   hipe_sparc:is_pseudo_fmove(I).
  
diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl
index 4085a0e1a7..171c2fb019 100644
--- a/lib/hipe/regalloc/hipe_temp_map.erl
+++ b/lib/hipe/regalloc/hipe_temp_map.erl
@@ -33,7 +33,7 @@
 
 -module(hipe_temp_map).
 
--export([cols2tuple/2, is_spilled/2, to_substlist/1]).
+-export([cols2tuple/2, find/2, is_spilled/2, to_substlist/1]).
 
 -include("../main/hipe.hrl").
 
@@ -50,12 +50,10 @@
 -spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map().
 
 cols2tuple(Map, Target) ->
-  ?ASSERT(check_list(Map)),
-  SortedMap = lists:keysort(1, Map), 
+  SortedMap = lists:keysort(1, Map),
   cols2tuple(0, SortedMap, [], Target). 
 
 %% sorted_cols2tuple(Map, Target) ->
-%%   ?ASSERT(check_list(Map)),
 %%   ?ASSERT(Map =:= lists:keysort(1, Map)),
 %%   cols2tuple(0, Map, [], Target). 
 
@@ -66,7 +64,7 @@ cols2tuple(_, [], Vs, _) ->
 cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R ->
   %% N makes sure the mapping is dense. N is he next key.
   cols2tuple(N+1, Ms, [C|Vs], Target);
-cols2tuple(N, SourceMapping, Vs, Target) ->
+cols2tuple(N, SourceMapping=[{R,_}|_], Vs, Target) when N < R ->
   %% The source was sparse, make up some placeholders...
   Val = 	      
     case Target:is_precoloured(N) of
@@ -82,7 +80,7 @@ cols2tuple(N, SourceMapping, Vs, Target) ->
 -spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean().
 
 is_spilled(Temp, Map) ->
-  case element(Temp+1, Map) of
+  case find(Temp, Map) of
     {reg, _R} -> false;
     {fp_reg, _R}-> false;
     {spill, _N} -> true;
@@ -106,9 +104,10 @@ is_spilled(Temp, Map) ->
 %%     {spill, _N} -> false;
 %%     unknown -> false
 %%   end.
-%% 
-%% %% Returns the inf temp Temp is mapped to.
-%% find(Temp, Map) -> element(Temp+1, Map).
+
+%% Returns the inf temp Temp is mapped to.
+find(Temp, Map) when Temp < tuple_size(Map) -> element(Temp+1, Map);
+find(_,    Map) when is_tuple(Map) -> unknown. % consistency with cols2tuple/2
 
 
 %%
diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl
index f1007c95ed..2720af92c1 100644
--- a/lib/hipe/regalloc/hipe_x86_specific.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific.erl
@@ -47,6 +47,7 @@
 	 liveout/2,
 	 uses/1,
 	 defines/1,
+	 defines_all_alloc/1,
 	 def_use/1,
 	 is_arg/1,	% used by hipe_ls_regalloc
 	 is_move/1,
@@ -176,6 +177,8 @@ defines(I) ->
 	hipe_x86:temp_is_allocatable(X),
 	hipe_x86:temp_type(X) =/= 'double'].
 
+defines_all_alloc(I) -> ?HIPE_X86_DEFUSE:insn_defs_all(I).
+
 is_move(Instruction) ->
   case hipe_x86:is_move(Instruction) of
     true ->
diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
index 0c022d5a27..cd4d02491d 100644
--- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl
+++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl
@@ -53,6 +53,7 @@
 	 liveout/2,
 	 uses/1,
 	 defines/1,
+	 defines_all_alloc/1,
 	 is_global/1,
 	 reg_nr/1,
 	 physical_name/1,
@@ -162,6 +163,8 @@ defines(I) ->
  	     hipe_x86:temp_is_allocatable(X),
  	     temp_is_double(X)].
 
+defines_all_alloc(I) -> hipe_amd64_defuse:insn_defs_all(I).
+
 temp_is_double(Temp) ->
   hipe_x86:temp_type(Temp) =:= 'double'.
 
diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl
index 4f66299f1d..4b5a19a19d 100644
--- a/lib/hipe/sparc/hipe_sparc_defuse.erl
+++ b/lib/hipe/sparc/hipe_sparc_defuse.erl
@@ -23,6 +23,7 @@
 -export([insn_def_all/1, insn_use_all/1]).
 -export([insn_def_gpr/1, insn_use_gpr/1]).
 -export([insn_def_fpr/1, insn_use_fpr/1]).
+-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]).
 -include("hipe_sparc.hrl").
 
 %%%
@@ -51,6 +52,12 @@ insn_def_gpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_gpr(I) ->
+  case I of
+    #pseudo_call{} -> true;
+    _ -> false
+  end.
+
 call_clobbered_gpr() ->
   [hipe_sparc:mk_temp(R, T)
    || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()].
@@ -115,6 +122,12 @@ insn_def_fpr(I) ->
     _ -> []
   end.
 
+insn_defs_all_fpr(I) ->
+  case I of
+    #pseudo_call{} -> true;
+    _ -> false
+  end.
+
 call_clobbered_fpr() ->
   [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()].
 
diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl
index 6681a10070..20138836dd 100644
--- a/lib/hipe/sparc/hipe_sparc_registers.erl
+++ b/lib/hipe/sparc/hipe_sparc_registers.erl
@@ -249,6 +249,8 @@ is_arg(R) ->
     _ -> false
   end.
 
+%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is
+%% hard-coded in hipe_sparc_defuse:insn_defs_all_gpr/1
 call_clobbered() ->		% does the RA strip the type or not?
   [%% ?G0 is the non-allocatable constant zero
    {?G1,tagged},{?G1,untagged},
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
index 9cba6cbe4b..4455def74e 100644
--- a/lib/hipe/x86/hipe_x86_defuse.erl
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -35,7 +35,7 @@
 -endif.
 
 -module(?HIPE_X86_DEFUSE).
--export([insn_def/1, insn_use/1]). %% src_use/1]).
+-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]).
 -include("../x86/hipe_x86.hrl").
 
 %%%
@@ -64,6 +64,16 @@ insn_def(I) ->
     _ -> []
   end.
 
+
+%% @doc Answers whether instruction I defines all allocatable registers. Used by
+%% hipe_regalloc_prepass.
+-spec insn_defs_all(_) -> boolean().
+insn_defs_all(I) ->
+  case I of
+    #pseudo_call{} -> true;
+    _ -> false
+  end.
+
 dst_def(Dst) ->
   case Dst of
     #x86_temp{} -> [Dst];
diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl
index 179d734501..f00bbfb280 100644
--- a/lib/hipe/x86/hipe_x86_registers.erl
+++ b/lib/hipe/x86/hipe_x86_registers.erl
@@ -224,6 +224,8 @@ ret(N) ->
 	    exit({?MODULE, ret, N})
     end.
 
+%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of
+%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1
 call_clobbered() ->
     [{?EAX,tagged},{?EAX,untagged},	% does the RA strip the type or not?
      {?EDX,tagged},{?EDX,untagged},
author	Magnus Lång <[email protected]>	2016-05-28 20:22:34 +0200
committer	Magnus Lång <[email protected]>	2016-09-02 15:04:45 +0200
commit	85bd166647e7f260fd665eb44da9151c0d88f208 (patch)
tree	84878ab624fcc22246bae89b75a6857415508539 /lib
parent	c2f8b61ca3682281752fa0984699214dfcbf7ccd (diff)
download	otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.gz otp-85bd166647e7f260fd665eb44da9151c0d88f208.tar.bz2 otp-85bd166647e7f260fd665eb44da9151c0d88f208.zip