diff options
Diffstat (limited to 'lib/hipe')
107 files changed, 4066 insertions, 1734 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile index 8dc2af2679..617f6749ac 100644 --- a/lib/hipe/amd64/Makefile +++ b/lib/hipe/amd64/Makefile @@ -57,10 +57,11 @@ MODULES=hipe_amd64_assemble \  	hipe_amd64_ra_naive \  	hipe_amd64_ra_postconditions \  	hipe_amd64_ra_sse2_postconditions \ -	hipe_amd64_ra_x87_ls \  	hipe_amd64_registers \  	hipe_amd64_spill_restore \ +	hipe_amd64_subst \  	hipe_amd64_x87 \ +	hipe_amd64_sse2 \  	hipe_rtl_to_amd64  ERL_FILES=$(MODULES:%=%.erl) @@ -125,10 +126,10 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl  $(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl  $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl  $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl  $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl  $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl  $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl +$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl  $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl  $(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl index b1f7bd7572..d062c0b37c 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -21,7 +21,7 @@  -module(hipe_amd64_ra_sse2_postconditions). --export([check_and_rewrite/2]). +-export([check_and_rewrite/2, check_and_rewrite/3]).  -include("../x86/hipe_x86.hrl").  -define(HIPE_INSTRUMENT_COMPILER, true). @@ -29,40 +29,48 @@  -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(AMD64Defun, Coloring) -> +check_and_rewrite(AMD64CFG, Coloring) -> +  check_and_rewrite(AMD64CFG, Coloring, 'normal'). + +check_and_rewrite(AMD64CFG, Coloring, Strategy) ->    %%io:format("Converting\n"), -  TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2), +  TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2,no_context),    %%io:format("Rewriting\n"), -  #defun{code=Code0} = AMD64Defun, -  {Code1, DidSpill} = do_insns(Code0, TempMap, [], false), -  {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}},  -   DidSpill}. - -do_insns([I|Insns], TempMap, Accum, DidSpill0) -> -  {NewIs, DidSpill1} = do_insn(I, TempMap), -  do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); -do_insns([], _TempMap, Accum, DidSpill) -> +  do_bbs(hipe_x86_cfg:labels(AMD64CFG), TempMap, Strategy, AMD64CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), +  CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> +  {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), +  do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), +	   DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) ->    {lists:reverse(Accum), DidSpill}. -do_insn(I, TempMap) ->	% Insn -> {Insn list, DidSpill} +do_insn(I, TempMap, Strategy) ->	% Insn -> {Insn list, DidSpill}    case I of      #fmove{} -> -      do_fmove(I, TempMap); +      do_fmove(I, TempMap, Strategy);      #fp_unop{} -> -      do_fp_unop(I, TempMap); +      do_fp_unop(I, TempMap, Strategy);      #fp_binop{} -> -      do_fp_binop(I, TempMap); +      do_fp_binop(I, TempMap, Strategy);      _ ->        %% All non sse2 ops        {[I], false}    end.  %%% Fix an fp_binop. -do_fp_binop(I, TempMap) -> +do_fp_binop(I, TempMap, Strategy) ->    #fp_binop{src=Src,dst=Dst} = I,    case is_mem_opnd(Dst, TempMap) of      true -> -      Tmp = clone(Dst), +      Tmp = clone(Dst, Strategy),        {[#fmove{src=Dst, dst=Tmp},  	I#fp_binop{src=Src,dst=Tmp},  	#fmove{src=Tmp,dst=Dst}], @@ -71,11 +79,11 @@ do_fp_binop(I, TempMap) ->        {[I], false}    end. -do_fp_unop(I, TempMap) -> +do_fp_unop(I, TempMap, Strategy) ->    #fp_unop{arg=Arg} = I,    case is_mem_opnd(Arg, TempMap) of      true -> -      Tmp = clone(Arg), +      Tmp = clone(Arg, Strategy),        {[#fmove{src=Arg, dst=Tmp},  	I#fp_unop{arg=Tmp},  	#fmove{src=Tmp,dst=Arg}], @@ -85,7 +93,7 @@ do_fp_unop(I, TempMap) ->    end.  %%% Fix an fmove op. -do_fmove(I, TempMap) -> +do_fmove(I, TempMap, Strategy) ->    #fmove{src=Src,dst=Dst} = I,    case      (is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap)) @@ -93,7 +101,7 @@ do_fmove(I, TempMap) ->      orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap))    of      true -> -      Tmp = spill_temp(double), +      Tmp = spill_temp(double, Strategy),        {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}],         true};      false -> @@ -106,86 +114,40 @@ is_float_temp(#x86_mem{}) -> false.  %%% Check if an operand denotes a memory cell (mem or pseudo).  is_mem_opnd(Opnd, TempMap) -> -  R = -    case Opnd of -      #x86_mem{} -> true; -      #x86_temp{type=double} -> -	Reg = hipe_x86:temp_reg(Opnd), -	case hipe_x86:temp_is_allocatable(Opnd) of -	  true ->  -	    case tuple_size(TempMap) > Reg of  -	      true -> -		case  -		  hipe_temp_map:is_spilled(Reg, TempMap) of -		  true -> -		    ?count_temp(Reg), -		    true; -		  false -> false -		end; -	      _ -> false -	    end; -	  false -> true -	end; -      _ -> false -    end, -  %% io:format("Op ~w mem: ~w\n",[Opnd,R]), -  R. - -%%% Check if an operand is a spilled Temp. - -%%src_is_spilled(Src, TempMap) -> -%%  case hipe_x86:is_temp(Src) of -%%    true -> -%%      Reg = hipe_x86:temp_reg(Src), -%%      case hipe_x86:temp_is_allocatable(Src) of -%%	true ->  -%%	  case tuple_size(TempMap) > Reg of  -%%	    true -> -%%	      case hipe_temp_map:is_spilled(Reg, TempMap) of -%%		true -> -%%		  ?count_temp(Reg), -%%		  true; -%%		false -> -%%		  false -%%	      end; -%%	    false -> -%%	      false -%%	  end; -%%	false -> true -%%      end; -%%    false -> false -%%  end. - -%% is_spilled(Temp, TempMap) -> -%%   case hipe_x86:temp_is_allocatable(Temp) of -%%     true -> -%%       Reg = hipe_x86:temp_reg(Temp), -%%       case tuple_size(TempMap) > Reg of  -%%  	true -> -%%  	  case hipe_temp_map:is_spilled(Reg, TempMap) of -%%  	    true -> -%%  	      ?count_temp(Reg), -%%  	      true; -%%  	    false -> -%%  	      false -%%  	  end; -%%  	false -> -%%  	  false -%%       end; -%%     false -> true -%%   end. +  case Opnd of +    #x86_mem{} -> true; +    #x86_temp{type=double} -> +      Reg = hipe_x86:temp_reg(Opnd), +      case hipe_x86:temp_is_allocatable(Opnd) of +	true -> +	  case hipe_temp_map:is_spilled(Reg, TempMap) of +	    true -> +	      ?count_temp(Reg), +	      true; +	    false -> false +	  end; +	false -> true +      end; +    _ -> false +  end.  %%% Make Reg a clone of Dst (attach Dst's type to Reg). -clone(Dst) -> +clone(Dst, Strategy) ->    Type =      case Dst of        #x86_mem{} -> hipe_x86:mem_type(Dst);        #x86_temp{} -> hipe_x86:temp_type(Dst)      end, -  spill_temp(Type). - -spill_temp(Type) -> +  spill_temp(Type, Strategy). + +spill_temp(Type, 'normal') -> +  hipe_x86:mk_new_temp(Type); +spill_temp(double, 'linearscan') -> +  hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(no_context), double); +spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged -> +  %% We can make a new temp here since we have yet to allocate registers for +  %% these types    hipe_x86:mk_new_temp(Type).  %%% Make a certain reg into a clone of Dst diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index 780c2cc547..7c6965b938 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -52,6 +52,7 @@   	 tailcall_clobbered/0,   	 temp0/0,  	 temp1/0, +	 sse2_temp0/0,  	 %% fixed/0,  	 wordsize/0  	]). @@ -107,6 +108,8 @@ heap_limit_offset() -> ?P_HP_LIMIT.  -define(TEMP0, ?R14).  -define(TEMP1, ?R13). +-define(SSE2_TEMP0, 00). +  -define(PROC_POINTER, ?RBP).  reg_name(R) -> @@ -204,6 +207,8 @@ allocatable() ->  allocatable_sse2() ->    [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15 +sse2_temp0() -> ?SSE2_TEMP0. +  allocatable_x87() ->    [0,1,2,3,4,5,6]. @@ -248,6 +253,9 @@ ret(N) ->      _ -> exit({?MODULE, ret, N})    end. +%% Note: the fact that (allocatable() UNION allocatable_x87() UNION +%% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in +%% hipe_x86_defuse:insn_defs_all/1  call_clobbered() ->    [{?RAX,tagged},{?RAX,untagged},	% does the RA strip the type or not?     {?RDX,tagged},{?RDX,untagged}, diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl new file mode 100644 index 0000000000..ea6b6cb9ba --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_sse2.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2016. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% +%% Fix {mem, mem} floating point operations that result from linear scan +%% allocated floats. + +-module(hipe_amd64_sse2). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(CFG) -> +  hipe_x86_cfg:map_bbs(fun do_bb/2, CFG). + +do_bb(_Lbl, BB) -> +  Code = do_insns(hipe_bb:code(BB), []), +  hipe_bb:code_update(BB, Code). + +do_insns([I|Insns], Accum) -> +  NewIs = do_insn(I), +  do_insns(Insns, lists:reverse(NewIs, Accum)); +do_insns([], Accum) -> +  lists:reverse(Accum). + +do_insn(I) -> +  case I of +    #fp_binop{} -> do_fp_binop(I); +    #fmove{}    -> do_fmove(I); +    _           -> [I] +  end. + +do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) -> +  {FixSrc, Src} = fix_binary(Src0, Dst), +  FixSrc ++ [I#fp_binop{src=Src}]. + +do_fmove(I = #fmove{src=Src0,dst=Dst}) -> +  {FixSrc, Src} = fix_binary(Src0, Dst), +  FixSrc ++ [I#fmove{src=Src}]. + +fix_binary(Src0, Dst) -> +  case is_mem_opnd(Src0) of +    false -> {[], Src0}; +    true -> +      case is_mem_opnd(Dst) of +	false -> {[], Src0}; +	true -> +	  Src1 = spill_temp(), +	  {[hipe_x86:mk_fmove(Src0, Src1)], Src1} +      end +  end. + +is_mem_opnd(#x86_fpreg{reg=Reg}) -> +  not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=double, reg=Reg}) -> +  not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=_, reg=Reg}) -> +  not hipe_amd64_registers:is_precoloured(Reg); +is_mem_opnd(#x86_mem{}) -> true. + +spill_temp() -> +  hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double). diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_subst.erl index 6da3f44cd3..7d0f06684b 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl +++ b/lib/hipe/amd64/hipe_amd64_subst.erl @@ -1,7 +1,7 @@  %%  %% %CopyrightBegin%  %%  -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. +%% Copyright Ericsson AB 2016. All Rights Reserved.  %%   %% Licensed under the Apache License, Version 2.0 (the "License");  %% you may not use this file except in compliance with the License. @@ -18,4 +18,4 @@  %% %CopyrightEnd%  %% --include("../x86/hipe_x86_ra_x87_ls.erl"). +-include("../x86/hipe_x86_subst.erl"). diff --git a/lib/hipe/arm/Makefile b/lib/hipe/arm/Makefile index 00b6732afa..ed2eccf428 100644 --- a/lib/hipe/arm/Makefile +++ b/lib/hipe/arm/Makefile @@ -61,6 +61,7 @@ MODULES=hipe_arm \  	hipe_arm_ra_naive \  	hipe_arm_ra_postconditions \  	hipe_arm_registers \ +	hipe_arm_subst \  	hipe_rtl_to_arm  HRL_FILES=hipe_arm.hrl diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl index f2fa0a5164..2fb6675da9 100644 --- a/lib/hipe/arm/hipe_arm_cfg.erl +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -24,6 +24,7 @@  -export([init/1,           labels/1, start_label/1,           succ/2, +         map_bbs/2, fold_bbs/3,           bb/2, bb_add/3]).  -export([postorder/1]).  -export([linearise/1]). @@ -35,6 +36,7 @@  -define(BREADTH_ORDER,true).  % for linear scan  -define(PARAMS_NEEDED,true).  -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true).  -include("hipe_arm.hrl").  -include("../flow/cfg.hrl"). diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl index f57b0e601c..f92cf4f82a 100644 --- a/lib/hipe/arm/hipe_arm_defuse.erl +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -22,6 +22,7 @@  -module(hipe_arm_defuse).  -export([insn_def_all/1, insn_use_all/1]).  -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1]).  -include("hipe_arm.hrl").  %%% @@ -55,6 +56,12 @@ insn_def_gpr(I) ->      _ -> []    end. +insn_defs_all_gpr(I) -> +  case I of +    #pseudo_call{} -> true; +    _ -> false +  end. +  call_clobbered_gpr() ->    [hipe_arm:mk_temp(R, T)     || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()]. diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl index a4b2f9c73c..55651d7180 100644 --- a/lib/hipe/arm/hipe_arm_finalise.erl +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -20,13 +20,17 @@  %%  -module(hipe_arm_finalise). --export([finalise/1]). +-export([finalise/2]).  -include("hipe_arm.hrl"). -finalise(Defun) -> +finalise(Defun, Options) ->    #defun{code=Code0} = Defun, -  Code1 = peep(expand(Code0)), -  Defun#defun{code=Code1}. +  Code1Rev = expand(Code0), +  Code2 = case proplists:get_bool(peephole, Options) of +	    true -> peep(Code1Rev); +	    false -> lists:reverse(Code1Rev) +	  end, +  Defun#defun{code=Code2}.  expand(Insns) ->    expand_list(Insns, []). @@ -34,7 +38,7 @@ expand(Insns) ->  expand_list([I|Insns], Accum) ->    expand_list(Insns, expand_insn(I, Accum));  expand_list([], Accum) -> -  lists:reverse(Accum). +  Accum.  expand_insn(I, Accum) ->    case I of @@ -63,12 +67,67 @@ expand_insn(I, Accum) ->        [I|Accum]    end. -peep(Insns) -> -  peep_list(Insns, []). +%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly +%% ordered list). This way, we can do replacements that would take multiple +%% passes with an in-order peephole optimiser. +%% +%% N.B., if a rule wants to produce multiple instructions (even if some of them +%% are unchanged, it should push the additional instructions on the More list, +%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns) +%% should have been peepholed previously. +peep(RevInsns) -> +  peep_list_skip([], RevInsns). + +peep_list([#b_label{'cond'='al',label=Label} +	   | (Insns = [#label{label=Label}|_])], More) -> +  peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst} +		,am1=#arm_temp{reg=Dst}}|Insns], More) -> +  peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}}, +	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}} +	   |Insns], More) when Imm > 0, Imm =< 8 -> +  peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}} +	    |Insns], More); +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}}, +	   #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}} +	   |Insns], More) when Imm >= 24, Imm < 32 -> +  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src +		 ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More); + +%% XXX: Load-after-store optimisation should also be applied to RTL, where it +%% can be more general, expose opportunities for constant propagation, etc. +peep_list([#store{stop='strb',src=Src,am2=Mem}=Str, +	   #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) -> +  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns], +	    [Str|More]); +peep_list([#store{stop='str',src=Src,am2=Mem}=Str, +	   #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) -> +  peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]); + +peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}}, +	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}} +	   |Insns], More) -> +  peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src +		 ,am1={Mask band (bnot InvMask),0}} | Insns], More); + +%% XXX: The place that generates brain-dead code like the following should be +%% fixed rather than trying to patch it over here. +peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem}, +	   #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}} +	   | Insns], More) -> +  peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More); + +peep_list(Insns, [I|More]) -> +  peep_list([I|Insns], More); +peep_list(Accum, []) -> +  Accum. -peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> -  peep_list(Insns, Accum); -peep_list([I|Insns], Accum) -> -  peep_list(Insns, [I|Accum]); -peep_list([], Accum) -> -  lists:reverse(Accum). +%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has +%% already been peeped or is otherwise uninteresting (such as empty). +peep_list_skip(Insns, [I|More]) -> +  peep_list([I|Insns], More); +peep_list_skip(Accum, []) -> +  Accum. diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl index e1e441a967..9a349b47d3 100644 --- a/lib/hipe/arm/hipe_arm_frame.erl +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -27,16 +27,14 @@  -define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet -frame(Defun) -> -  Formals = fix_formals(hipe_arm:defun_formals(Defun)), -  Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals), -  MinFrame = defun_minframe(Defun), +frame(CFG) -> +  Formals = fix_formals(hipe_arm_cfg:params(CFG)), +  Temps0 = all_temps(CFG, Formals), +  MinFrame = defun_minframe(CFG),    Temps = ensure_minframe(MinFrame, Temps0), -  ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)), -  CFG0 = hipe_arm_cfg:init(Defun), -  Liveness = ?LIVENESS_ALL:analyse(CFG0), -  CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), -  hipe_arm_cfg:linearise(CFG1). +  ClobbersLR = clobbers_lr(CFG), +  Liveness = ?LIVENESS_ALL:analyse(CFG), +  do_body(CFG, Liveness, Formals, Temps, ClobbersLR).  fix_formals(Formals) ->    fix_formals(hipe_arm_registers:nr_args(), Formals). @@ -51,32 +49,21 @@ do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) ->    do_prologue(CFG1, Context).  do_blocks(CFG, Context) -> -  Labels = hipe_arm_cfg:labels(CFG), -  do_blocks(Labels, CFG, Context). +  hipe_arm_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) ->    Liveness = context_liveness(Context),    LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label), -  Block = hipe_arm_cfg:bb(CFG, Label),    Code = hipe_bb:code(Block), -  NewCode = do_block(Code, LiveOut, Context), -  NewBlock = hipe_bb:code_update(Block, NewCode), -  NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock), -  do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> -  CFG. - -do_block(Insns, LiveOut, Context) -> -  do_block(Insns, LiveOut, Context, context_framesize(Context), []). +  NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), +  hipe_bb:code_update(Block, NewCode).  do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->    {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0),    do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode));  do_block([], _, Context, FPoff, RevCode) ->    FPoff0 = context_framesize(Context), -  if FPoff =:= FPoff0 -> []; -     true -> exit({?MODULE,do_block,FPoff}) -  end, +  FPoff0 = FPoff,    lists:reverse(RevCode, []).  do_insn(I, LiveOut, Context, FPoff) -> @@ -543,39 +530,46 @@ temp_is_pseudo(Temp) ->  %%% Detect if a Defun's body clobbers LR.  %%% -clobbers_lr(Insns) -> +clobbers_lr(CFG) ->    LRreg = hipe_arm_registers:lr(),    LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'),    LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'), -  clobbers_lr(Insns, LRtagged, LRuntagged). - -clobbers_lr([I|Insns], LRtagged, LRuntagged) -> -  Defs = hipe_arm_defuse:insn_def_gpr(I), -  case lists:member(LRtagged, Defs) of -    true -> true; -    false -> -      case lists:member(LRuntagged, Defs) of -	true -> true; -	false -> clobbers_lr(Insns, LRtagged, LRuntagged) -      end -  end; -clobbers_lr([], _LRtagged, _LRuntagged) -> false. +  any_insn(fun(I) -> +	       Defs = hipe_arm_defuse:insn_def_gpr(I), +	       lists:member(LRtagged, Defs) +		 orelse lists:member(LRuntagged, Defs) +	   end, CFG). + +any_insn(Pred, CFG) -> +  %% Abuse fold to do an efficient "any"-operation using nonlocal control flow +  FoundSatisfying = make_ref(), +  try fold_insns(fun (I, _) -> +		     case Pred(I) of +		       true -> throw(FoundSatisfying); +		       false -> false +		     end +		 end, false, CFG) +  of _ -> false +  catch FoundSatisfying -> true +  end.  %%%  %%% Build the set of all temps used in a Defun's body.  %%% -all_temps(Code, Formals) -> -  S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> +  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),    S1 = tset_del_list(S0, Formals),    tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) ->    S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)), -  S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)), -  find_temps(Insns, S2); -find_temps([], S) -> -  S. +  tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> +  hipe_arm_cfg:fold_bbs( +    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, +    InitAcc, CFG).  tset_empty() ->    gb_sets:new(). @@ -604,16 +598,11 @@ tset_to_list(S) ->  %%% in the middle of a tailcall.  %%% -defun_minframe(Defun) -> -  MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0), -  MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))), +defun_minframe(CFG) -> +  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), +  MyArity = length(fix_formals(hipe_arm_cfg:params(CFG))),    erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> -  body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> -  MTA. -  insn_mta(I, MTA) ->    case I of      #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl index dce1193b24..8a7fa86394 100644 --- a/lib/hipe/arm/hipe_arm_main.erl +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -24,15 +24,17 @@  rtl_to_arm(MFA, RTL, Options) ->    Defun1 = hipe_rtl_to_arm:translate(RTL), +  CFG1 = hipe_arm_cfg:init(Defun1),    %% io:format("~w: after translate\n", [?MODULE]),    %% hipe_arm_pp:pp(Defun1), -  Defun2 = hipe_arm_ra:ra(Defun1, Options), +  CFG2 = hipe_arm_ra:ra(CFG1, Options),    %% io:format("~w: after regalloc\n", [?MODULE]), -  %% hipe_arm_pp:pp(Defun2), -  Defun3 = hipe_arm_frame:frame(Defun2), +  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), +  CFG3 = hipe_arm_frame:frame(CFG2), +  Defun3 = hipe_arm_cfg:linearise(CFG3),    %% io:format("~w: after frame\n", [?MODULE]),    %% hipe_arm_pp:pp(Defun3), -  Defun4 = hipe_arm_finalise:finalise(Defun3), +  Defun4 = hipe_arm_finalise:finalise(Defun3, Options),    %% io:format("~w: after finalise\n", [?MODULE]),    pp(Defun4, MFA, Options),    {native, arm, {unprofiled, Defun4}}. diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl index 2f65e864fd..bfb649326c 100644 --- a/lib/hipe/arm/hipe_arm_ra.erl +++ b/lib/hipe/arm/hipe_arm_ra.erl @@ -22,36 +22,40 @@  -module(hipe_arm_ra).  -export([ra/2]). -ra(Defun0, Options) -> -  %% hipe_arm_pp:pp(Defun0), -  {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> +  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG0)), +  {CFG1, _FPLiveness1, Coloring_fp, SpillIndex}      = case proplists:get_bool(inline_fp, Options) of  %%	true -> -%%	  hipe_regalloc_loop:ra_fp(Defun0, Options, +%%	  FPLiveness0 = hipe_arm_specific_fp:analyze(CFG0, no_context), +%%	  hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options,  %%				   hipe_coalescing_regalloc, -%%				   hipe_arm_specific_fp); +%%				   hipe_arm_specific_fp, no_context);  	false -> -	  {Defun0,[],0} +	  {CFG0,undefined,[],0}        end, -  %% hipe_arm_pp:pp(Defun1), -  {Defun2, Coloring} +  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG1)), +  GPLiveness1 = hipe_arm_specific:analyze(CFG1, no_context), +  {CFG2, _GPLiveness2, Coloring}      = case proplists:get_value(regalloc, Options, coalescing) of  	coalescing -> -	  ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc);  	optimistic -> -	  ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc);  	graph_color -> -	  ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, +	     hipe_graph_coloring_regalloc);  	linear_scan -> -	  hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options); +	  hipe_arm_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options);  	naive -> -	  hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options); +	  hipe_arm_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options);          _ ->  	  exit({unknown_regalloc_compiler_option,  		proplists:get_value(regalloc,Options)})        end, -  %% hipe_arm_pp:pp(Defun2), -  hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). +  %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), +  hipe_arm_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> -  hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> +  hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, +			hipe_arm_specific, no_context). diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl index 4faeadcd7f..2a3fded147 100644 --- a/lib/hipe/arm/hipe_arm_ra_finalise.erl +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -23,12 +23,13 @@  -export([finalise/3]).  -include("hipe_arm.hrl"). -finalise(Defun, TempMap, _FPMap0=[]) -> -  Code = hipe_arm:defun_code(Defun), -  {_, SpillLimit} = hipe_arm:defun_var_range(Defun), +finalise(CFG, TempMap, _FPMap0=[]) -> +  {_, SpillLimit} = hipe_gensym:var_range(arm),    Map = mk_ra_map(TempMap, SpillLimit), -  NewCode = ra_code(Code, Map, []), -  Defun#defun{code=NewCode}. +  hipe_arm_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map) end, CFG). + +ra_bb(BB, Map) -> +  hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])).  ra_code([I|Insns], Map, Accum) ->    ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl index d9a360d00c..0aa888da99 100644 --- a/lib/hipe/arm/hipe_arm_ra_ls.erl +++ b/lib/hipe/arm/hipe_arm_ra_ls.erl @@ -21,37 +21,35 @@  %%% Linear Scan register allocator for ARM  -module(hipe_arm_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> -  NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), -  CFG = hipe_arm_cfg:init(NewDefun), -  SpillLimit = hipe_arm_specific:number_of_temporaries(CFG), -  alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> +  SpillLimit = hipe_arm_specific:number_of_temporaries(CFG, no_context), +  alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> -  CFG = hipe_arm_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) ->    {Coloring, _NewSpillIndex} =      regalloc( -      CFG, +      CFG, Liveness,        hipe_arm_registers:allocatable_gpr()--        [hipe_arm_registers:temp3(),         hipe_arm_registers:temp2(),         hipe_arm_registers:temp1()],        [hipe_arm_cfg:start_label(CFG)],        SpillIndex, SpillLimit, Options, -      hipe_arm_specific), -  {NewDefun, _DidSpill} = +      hipe_arm_specific, no_context), +  {NewCFG, _DidSpill} =      hipe_arm_ra_postconditions:check_and_rewrite( -      Defun, Coloring, 'linearscan'), -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), +      CFG, Coloring, 'linearscan'), +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context),    {SpillMap, _NewSpillIndex2} = -    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, -			     hipe_arm_specific, TempMap), +    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, +			     hipe_arm_specific, no_context, TempMap),    Coloring2 =      hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap), -  {NewDefun, Coloring2}. +  {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> -  hipe_ls_regalloc:regalloc( -    CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +	 TgtMod, TgtCtx) -> +  hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, +			    DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl index 6201269f44..395beff292 100644 --- a/lib/hipe/arm/hipe_arm_ra_naive.erl +++ b/lib/hipe/arm/hipe_arm_ra_naive.erl @@ -20,11 +20,11 @@  %%  -module(hipe_arm_ra_naive). --export([ra/3]). +-export([ra/4]).  -include("hipe_arm.hrl"). -ra(Defun, _Coloring_fp, _Options) ->	% -> {Defun, Coloring} -  {NewDefun,_DidSpill} = -    hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), -  {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) ->	% -> {CFG, Liveness, Coloring} +  {NewCFG,_DidSpill} = +    hipe_arm_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), +  {NewCFG, Liveness, []}. diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl index 40978e65f6..412524e2e6 100644 --- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -25,17 +25,13 @@  -include("hipe_arm.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), -  check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context), +  check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) ->    Strategy = strategy(Allocator), -  #defun{code=Code0} = Defun, -  {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), -  VarRange = {0, hipe_gensym:get_var(arm)}, -  {Defun#defun{code=Code1, var_range=VarRange}, -   DidSpill}. +  do_bbs(hipe_arm_cfg:labels(CFG), TempMap, Strategy, CFG, false).  strategy(Allocator) ->    case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) ->      'naive' -> 'fixed'    end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_arm_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), +  CFG = hipe_arm_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). +  do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),    do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl index dcf039676b..3ecf2f2fdb 100644 --- a/lib/hipe/arm/hipe_arm_registers.erl +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -180,6 +180,8 @@ is_arg(R) ->      _ -> false    end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1  call_clobbered() ->		% does the RA strip the type or not?    [{?R0,tagged},{?R0,untagged},     {?R1,tagged},{?R1,untagged}, diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl new file mode 100644 index 0000000000..4d077f3cd6 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_subst.erl @@ -0,0 +1,112 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2016. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% + +-module(hipe_arm_subst). +-export([insn_temps/2]). +-include("hipe_arm.hrl"). + +%% These should be moved to hipe_arm and exported +-type temp()    :: #arm_temp{}. +-type shiftop() :: lsl | lsr | asr | ror. +-type imm4()    :: 0..15. +-type imm5()    :: 0..31. +-type imm8()    :: 0..255. +-type am1()     :: {imm8(),imm4()} +		 | temp() +		 | {temp(), rrx} +		 | {temp(), shiftop(), imm5()} +		 | {temp(), shiftop(), temp()}. +-type am2()     :: #am2{}. +-type am3()     :: #am3{}. +-type arg()     :: temp() | integer(). +-type funv()    :: #arm_mfa{} | #arm_prim{} | temp(). +-type insn()    :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> +  AM1 = fun(O) -> am1_temps(T, O) end, +  AM2 = fun(O) -> am2_temps(T, O) end, +  AM3 = fun(O) -> am3_temps(T, O) end, +  Arg = fun(O) -> arg_temps(T, O) end, +  case I of +      #alu  {dst=D,src=L,am1=R} -> I#alu{dst=T(D),src=T(L),am1=AM1(R)}; +      #cmp        {src=L,am1=R} -> I#cmp         {src=T(L),am1=AM1(R)}; +      #load       {dst=D,am2=S} -> I#load        {dst=T(D),am2=AM2(S)}; +      #ldrsb      {dst=D,am3=S} -> I#ldrsb       {dst=T(D),am3=AM3(S)}; +      #move       {dst=D,am1=S} -> I#move        {dst=T(D),am1=AM1(S)}; +      #pseudo_move{dst=D,src=S} -> I#pseudo_move {dst=T(D),src=T(S)}; +      #store      {src=S,am2=D} -> I#store       {src=T(S),am2=AM2(D)}; +      #b_label{} -> I; +      #comment{} -> I; +      #label{} -> I; +      #pseudo_bc{} -> I; +      #pseudo_blr{} -> I; +      #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)}; +      #pseudo_call_prepare{} -> I; +      #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; +      #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} -> +	  I#pseudo_switch{jtab=T(J),index=T(Ix)}; +      #pseudo_tailcall{funv=F,stkargs=Stk} -> +	  I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; +      #pseudo_tailcall_prepare{} -> I; +      #smull{dstlo=DL,dsthi=DH,src1=L,src2=R} -> +	  I#smull{dstlo=T(DL),dsthi=T(DH),src1=T(L),src2=T(R)} +  end. + +-spec am1_temps(subst_fun(), am1()) -> am1(). +am1_temps(_SubstTemp, T={C,R}) when is_integer(C), is_integer(R) -> T; +am1_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T); +am1_temps(SubstTemp, {T=#arm_temp{},rrx}) -> {SubstTemp(T),rrx}; +am1_temps(SubstTemp, {A=#arm_temp{},Op,B=#arm_temp{}}) when is_atom(Op) -> +    {SubstTemp(A),Op,SubstTemp(B)}; +am1_temps(SubstTemp, {T=#arm_temp{},Op,I}) when is_atom(Op), is_integer(I) -> +    {SubstTemp(T),Op,I}. + +-spec am2_temps(subst_fun(), am2()) -> am2(). +am2_temps(SubstTemp, T=#am2{src=A=#arm_temp{},offset=O0}) -> +    O = case O0 of +	    _ when is_integer(O0) -> O0; +	    #arm_temp{} -> SubstTemp(O0); +	    {B=#arm_temp{},rrx} -> {SubstTemp(B),rrx}; +	    {B=#arm_temp{},Op,I} when is_atom(Op), is_integer(I) -> +		{SubstTemp(B),Op,I} +	end, +    T#am2{src=SubstTemp(A),offset=O}. + +-spec am3_temps(subst_fun(), am3()) -> am3(). +am3_temps(SubstTemp, T=#am3{src=A=#arm_temp{},offset=O0}) -> +    O = case O0 of +	    _ when is_integer(O0) -> O0; +	    #arm_temp{} -> SubstTemp(O0) +	end, +    T#am3{src=SubstTemp(A),offset=O}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#arm_mfa{}) -> M; +funv_temps(_SubstTemp, P=#arm_prim{}) -> P; +funv_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp,  T=#arm_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl index 93342aba33..2f9181d517 100644 --- a/lib/hipe/arm/hipe_rtl_to_arm.erl +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -138,7 +138,6 @@ mk_shift(S, Dst, Src1, ShiftOp, Src2) ->    end.  mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) -> -  io:format("~w: RTL alu with two immediates\n", [?MODULE]),    Tmp = new_untagged_temp(),    mk_li(Tmp, Src1,  	mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)). @@ -179,7 +178,6 @@ mk_arith(S, Dst, Src1, ArithOp, Src2) ->    end.  mk_arith_ii(S, Dst, Src1, ArithOp, Src2) -> -  io:format("~w: RTL alu with two immediates\n", [?MODULE]),    Tmp = new_untagged_temp(),    mk_li(Tmp, Src1,  	mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)). @@ -277,7 +275,6 @@ mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) ->    end.  mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> -  io:format("~w: RTL branch with two immediates\n", [?MODULE]),    Tmp = new_untagged_temp(),    mk_li(Tmp, Imm1,  	mk_branch_ri(Tmp, Cond, Imm2, @@ -472,7 +469,6 @@ mk_load(Dst, Base1, Base2, LoadSize, LoadSign) ->    end.  mk_load_ii(Dst, Base1, Base2, LdOp) -> -  io:format("~w: RTL load with two immediates\n", [?MODULE]),    Tmp = new_untagged_temp(),    mk_li(Tmp, Base1,  	mk_load_ri(Dst, Tmp, Base2, LdOp)). @@ -485,7 +481,6 @@ mk_load_rr(Dst, Base1, Base2, LdOp) ->    [hipe_arm:mk_load(LdOp, Dst, Am2)].  mk_ldrsb_ii(Dst, Base1, Base2) -> -  io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]),    Tmp = new_untagged_temp(),    mk_li(Tmp, Base1,  	mk_ldrsb_ri(Dst, Tmp, Base2)). @@ -543,7 +538,7 @@ conv_return(I, Map, Data) ->    {I2, Map0, Data}.  conv_store(I, Map, Data) -> -  {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map), +  {Base, Map0} = conv_src(hipe_rtl:store_base(I), Map),    {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),    {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),    StoreSize = hipe_rtl:store_size(I), @@ -567,13 +562,28 @@ mk_store(Src, Base, Offset, StoreSize) ->    end.  mk_store2(Src, Base, Offset, StOp) -> -  case hipe_arm:is_temp(Offset) of +  case hipe_arm:is_temp(Base) of      true -> -      mk_store_rr(Src, Base, Offset, StOp); -    _ -> -      mk_store_ri(Src, Base, Offset, StOp) +      case hipe_arm:is_temp(Offset) of +	true -> +	  mk_store_rr(Src, Base, Offset, StOp); +	_ -> +	  mk_store_ri(Src, Base, Offset, StOp) +      end; +    false -> +      case hipe_arm:is_temp(Offset) of +	true -> +	  mk_store_ri(Src, Offset, Base, StOp); +	_ -> +	  mk_store_ii(Src, Base, Offset, StOp) +      end    end. -   + +mk_store_ii(Src, Base, Offset, StOp) -> +  Tmp = new_untagged_temp(), +  mk_li(Tmp, Base, +	mk_store_ri(Src, Tmp, Offset, StOp)). +  mk_store_ri(Src, Base, Offset, StOp) ->    hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []). diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 230fce2e68..c9cc1cfe25 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -560,6 +560,9 @@ type(erlang, byte_size, 1, Xs, Opaques) ->    strict(erlang, byte_size, 1, Xs,  	 fun (_) -> t_non_neg_integer() end, Opaques);  %% Guard bif, needs to be here. +type(erlang, ceil, 1, Xs, Opaques) -> +  strict(erlang, ceil, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here.  %% Also much more expressive than anything you could write in a spec...  type(erlang, element, 2, Xs, Opaques) ->    strict(erlang, element, 2, Xs, @@ -588,6 +591,9 @@ type(erlang, element, 2, Xs, Opaques) ->  type(erlang, float, 1, Xs, Opaques) ->    strict(erlang, float, 1, Xs, fun (_) -> t_float() end, Opaques);  %% Guard bif, needs to be here. +type(erlang, floor, 1, Xs, Opaques) -> +  strict(erlang, floor, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here.  type(erlang, hd, 1, Xs, Opaques) ->    strict(erlang, hd, 1, Xs, fun ([X]) -> t_cons_hd(X) end, Opaques);  type(erlang, info, 1, Xs, _) -> type(erlang, system_info, 1, Xs); % alias @@ -2341,6 +2347,9 @@ arg_types(erlang, bit_size, 1) ->  %% Guard bif, needs to be here.  arg_types(erlang, byte_size, 1) ->    [t_bitstr()]; +%% Guard bif, needs to be here. +arg_types(erlang, ceil, 1) -> +  [t_number()];  arg_types(erlang, halt, 0) ->    [];  arg_types(erlang, halt, 1) -> @@ -2361,6 +2370,9 @@ arg_types(erlang, element, 2) ->  arg_types(erlang, float, 1) ->    [t_number()];  %% Guard bif, needs to be here. +arg_types(erlang, floor, 1) -> +  [t_number()]; +%% Guard bif, needs to be here.  arg_types(erlang, hd, 1) ->    [t_cons()];  arg_types(erlang, info, 1) -> diff --git a/lib/hipe/flow/cfg.inc b/lib/hipe/flow/cfg.inc index 0bad2a8dd7..cb5f397f64 100644 --- a/lib/hipe/flow/cfg.inc +++ b/lib/hipe/flow/cfg.inc @@ -32,6 +32,8 @@  %%  bb(CFG, Label) - returns the basic block named 'Label' from the CFG.  %%  bb_add(CFG, Label, NewBB) - makes NewBB the basic block associated  %%       with Label. +%%  map_bbs(Fun, CFG) - map over all code without changing control flow. +%%  fold_bbs(Fun, Acc, CFG) - fold over the basic blocks in a CFG.  %%  succ(Map, Label) - returns a list of successors of basic block 'Label'.  %%  pred(Map, Label) - returns the predecessors of basic block 'Label'.  %%  fallthrough(CFG, Label) - returns fall-through successor of basic  @@ -89,6 +91,7 @@  -define(BREADTH_ORDER,true). % for linear scan  -define(PARAMS_NEEDED,true).  -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true).  -endif.  %%===================================================================== @@ -307,11 +310,7 @@ redirect_phis([I|Rest], OldPred, NewPred, Acc) ->  %% @doc  Adds a new basic block to a CFG (or updates an existing block).  bb_add(CFG, Label, NewBB) ->    %% Asserting that the NewBB is a legal basic block -  Last = hipe_bb:last(NewBB), -  case is_branch(Last) of -    true  -> ok; -    false -> throw({?MODULE, {"Basic block ends without branch", Last}}) -  end, +  Last = assert_bb(NewBB),    %% The order of the elements from branch_successors/1 is    %% significant. It determines the basic block order when the CFG is    %% converted to linear form. That order may have been tuned for @@ -339,11 +338,53 @@ bb_add(CFG, Label, NewBB) ->  		    HT2, OldSucc -- Succ),    CFG#cfg{table = HT3}. +-ifdef(MAP_FOLD_NEEDED). +-spec map_bbs(fun((cfg_lbl(), hipe_bb:bb()) -> hipe_bb:bb()), cfg()) -> cfg(). +%% @doc  Map over the code in a CFG without changing any control flow. +map_bbs(Fun, CFG = #cfg{table=HT0}) -> +    HT = gb_trees:map( +	   fun(Lbl, {OldBB, OldSucc, OldPred}) -> +		   NewBB = Fun(Lbl, OldBB), +		   %% Assert preconditions +		   NewLast = assert_bb(NewBB), +		   OldSucc = remove_duplicates(branch_successors(NewLast)), +		   {NewBB, OldSucc, OldPred} +	   end, HT0), +    CFG#cfg{table=HT}. + +-spec fold_bbs(fun((cfg_lbl(), hipe_bb:bb(), Acc) -> Acc), Acc, cfg()) -> Acc. +%% @doc  Fold over the basic blocks in a CFG in unspecified order. +fold_bbs(Fun, InitAcc, #cfg{table=HT}) -> +    gb_trees_fold(fun(Lbl, {BB, _, _}, Acc) -> Fun(Lbl, BB, Acc) end, +		  InitAcc, HT). + +gb_trees_fold(Fun, InitAcc, Tree) -> +    gb_trees_fold_1(Fun, InitAcc, gb_trees:iterator(Tree)). + +gb_trees_fold_1(Fun, InitAcc, Iter0) -> +    case gb_trees:next(Iter0) of +	none -> InitAcc; +	{Key, Value, Iter} -> +	    gb_trees_fold_1(Fun, Fun(Key, Value, InitAcc), Iter) +    end. +-endif. % MAP_FOLD_NEEDED + +assert_bb(BB) -> +    assert_bb_is(hipe_bb:code(BB)). + +assert_bb_is([Last]) -> +    true = is_branch(Last), +    Last; +assert_bb_is([I|Is]) -> +    false = is_branch(I), +    false = is_label(I), +    assert_bb_is(Is). +  remove_pred(HT, FromL, PredL) ->    case gb_trees:lookup(FromL, HT) of      {value, {Block, Succ, Preds}} ->        Code = hipe_bb:code(Block), -      NewCode = remove_pred_from_phis(Code, PredL, []), +      NewCode = remove_pred_from_phis(PredL, Code),        NewBlock = hipe_bb:code_update(Block, NewCode),              gb_trees:update(FromL, {NewBlock,Succ,lists:delete(PredL,Preds)}, HT);      none -> @@ -374,20 +415,20 @@ add_pred(HT, ToL, PredL) ->  -ifdef(CFG_CAN_HAVE_PHI_NODES).  %% phi-instructions in a removed block's successors must be aware of  %% the change. -remove_pred_from_phis(List = [I|Left], Label, Acc) -> +remove_pred_from_phis(Label, List = [I|Left]) ->    case is_phi(I) of -    true ->  -      NewAcc = [phi_remove_pred(I, Label)|Acc], -      remove_pred_from_phis(Left, Label, NewAcc); +    true -> +      NewI = phi_remove_pred(I, Label), +      [NewI | remove_pred_from_phis(Label, Left)];      false -> -      lists:reverse(Acc) ++ List +      List    end; -remove_pred_from_phis([], _Label, Acc) -> -  lists:reverse(Acc). +remove_pred_from_phis(_Label, []) -> +  [].  -else.  %% this is used for code representations like those of back-ends which  %% do not have phi-nodes. -remove_pred_from_phis(Code, _Label, _Acc) -> +remove_pred_from_phis(_Label, Code) ->    Code.  -endif. @@ -927,24 +968,52 @@ merge(BB, BB2, BB2_Label) ->  remove_unreachable_code(CFG) ->    Start = start_label(CFG), -  Reachable = find_reachable([Start], CFG, gb_sets:from_list([Start])), -  %% Reachable is an ordset: it comes from gb_sets:to_list/1. -  %% So use ordset:subtract instead of '--' below. -  Labels = ordsets:from_list(labels(CFG)), -  case ordsets:subtract(Labels, Reachable) of -    [] -> -      CFG; +  %% No unreachable block will make another block reachable, so no fixpoint +  %% looping is required +  Reachable = find_reachable([], [Start], CFG, #{Start=>[]}), +  case [L || L <- labels(CFG), not maps:is_key(L, Reachable)] of +    [] -> CFG;      Remove -> -      NewCFG = lists:foldl(fun(X, Acc) -> bb_remove(Acc, X) end, CFG, Remove), -      remove_unreachable_code(NewCFG) +      HT0 = CFG#cfg.table, +      HT1 = lists:foldl(fun gb_trees:delete/2, HT0, Remove), +      ReachableP = fun(Lbl) -> maps:is_key(Lbl, Reachable) end, +      HT = gb_trees:map(fun(_,B)->prune_preds(B, ReachableP)end, HT1), +      CFG#cfg{table=HT}    end. -find_reachable([Label|Left], CFG, Acc) -> -  NewAcc = gb_sets:add(Label, Acc), -  Succ = succ(CFG, Label), -  find_reachable([X || X <- Succ, not gb_sets:is_member(X, Acc)] ++ Left, -		 CFG, NewAcc); -find_reachable([], _CFG, Acc) -> -  gb_sets:to_list(Acc). +find_reachable([], [], _CFG, Acc) -> Acc; +find_reachable([Succ|Succs], Left, CFG, Acc) -> +  case Acc of +    #{Succ := _} -> find_reachable(Succs, Left, CFG, Acc); +    #{} -> find_reachable(Succs, [Succ|Left], CFG, Acc#{Succ => []}) +  end; +find_reachable([], [Label|Left], CFG, Acc) -> +  find_reachable(succ(CFG, Label), Left, CFG, Acc). + +%% Batch prune unreachable predecessors. Asymptotically faster than deleting +%% unreachable blocks one at a time with bb_remove, at least when +%% CFG_CAN_HAVE_PHI_NODES is undefined. Otherwise a phi_remove_preds might be +%% needed to achieve that. +prune_preds(B={Block, Succ, Preds}, ReachableP) -> +  case lists:partition(ReachableP, Preds) of +    {_, []} -> B; +    {NewPreds, Unreach} -> +      NewCode = remove_preds_from_phis(Unreach, hipe_bb:code(Block)), +      {hipe_bb:code_update(Block, NewCode), Succ, NewPreds} +  end. +-ifdef(CFG_CAN_HAVE_PHI_NODES). +remove_preds_from_phis(_, []) -> []; +remove_preds_from_phis(Preds, List=[I|Left]) -> +  case is_phi(I) of +    false -> List; +    true -> +      NewI = lists:foldl(fun(L,IA)->phi_remove_pred(IA,L)end, +			 I, Preds), +      [NewI | remove_preds_from_phis(Preds, Left)] +  end. +-else. +remove_preds_from_phis(_, Code) -> Code.  -endif. + +-endif. %% -ifdef(REMOVE_UNREACHABLE_CODE) diff --git a/lib/hipe/flow/hipe_bb.erl b/lib/hipe/flow/hipe_bb.erl index 2da3a6dc99..08f5e0a0cb 100644 --- a/lib/hipe/flow/hipe_bb.erl +++ b/lib/hipe/flow/hipe_bb.erl @@ -41,6 +41,8 @@  -include("hipe_bb.hrl"). +-export_type([bb/0]). +  %%  %% Constructs a basic block.  %% Returns a basic block: {bb, Code} diff --git a/lib/hipe/flow/liveness.inc b/lib/hipe/flow/liveness.inc index a1caa3e0ad..bffaa4e3df 100644 --- a/lib/hipe/flow/liveness.inc +++ b/lib/hipe/flow/liveness.inc @@ -49,6 +49,10 @@  -endif.  -include("../flow/cfg.hrl"). +-include("../main/hipe.hrl"). + +-opaque liveness() :: map(). +-export_type([liveness/0]).  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  %% @@ -72,7 +76,7 @@  %% The generic liveness analysis  %% --spec analyze(cfg()) -> gb_trees:tree(). +-spec analyze(cfg()) -> liveness().  -ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET).  analyze(CFG) -> @@ -188,6 +192,7 @@ update_livein(Label, NewLiveIn, Liveness) ->  %%  %% LiveOut for a block is the union of the successors LiveIn  %% +-spec liveout(liveness(), _) -> [_].  liveout(Liveness, L) ->    Succ = successors(L, Liveness), @@ -210,7 +215,7 @@ successors(L, Liveness) ->    {_GK, _LiveIn, Successors} = liveness_lookup(L, Liveness),    Successors. --spec livein(gb_trees:tree(), _) -> [_]. +-spec livein(liveness(), _) -> [_].  livein(Liveness, L) ->    {_GK, LiveIn, _Successors} = liveness_lookup(L, Liveness), @@ -292,18 +297,15 @@ strip([{_,Y}|Xs]) ->  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). +  liveness_init(List) -> -  liveness_init(List, gb_trees:empty()). +  maps:from_list(List). -liveness_init([{Lbl, Data}|Left], Acc) -> -  liveness_init(Left, gb_trees:insert(Lbl, Data, Acc)); -liveness_init([], Acc) -> -  Acc. -    liveness_lookup(Label, Liveness) -> -  gb_trees:get(Label, Liveness). +  maps:get(Label, Liveness).  liveness_update(Label, Val, Liveness) -> -  gb_trees:update(Label, Val, Liveness). +  maps:update(Label, Val, Liveness).  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/icode/hipe_icode_bincomp.erl b/lib/hipe/icode/hipe_icode_bincomp.erl index 5a27519141..5ee6fe2c87 100644 --- a/lib/hipe/icode/hipe_icode_bincomp.erl +++ b/lib/hipe/icode/hipe_icode_bincomp.erl @@ -40,8 +40,8 @@  -spec cfg(cfg()) -> cfg().  cfg(Cfg1) -> -  StartLbls = ordsets:from_list([hipe_icode_cfg:start_label(Cfg1)]), -  find_bs_get_integer(StartLbls, Cfg1, StartLbls). +  StartLbl = hipe_icode_cfg:start_label(Cfg1), +  find_bs_get_integer([StartLbl], Cfg1, set_from_list([StartLbl])).  find_bs_get_integer([Lbl|Rest], Cfg, Visited) ->    BB = hipe_icode_cfg:bb(Cfg, Lbl), @@ -55,10 +55,10 @@ find_bs_get_integer([Lbl|Rest], Cfg, Visited) ->         not_ok ->  	 Cfg       end, -  Succs = ordsets:from_list(hipe_icode_cfg:succ(NewCfg, Lbl)), -  NewSuccs = ordsets:subtract(Succs, Visited), -  NewLbls = ordsets:union(NewSuccs, Rest), -  NewVisited = ordsets:union(NewSuccs, Visited), +  Succs = hipe_icode_cfg:succ(NewCfg, Lbl), +  NewSuccs = not_visited(Succs, Visited), +  NewLbls = NewSuccs ++ Rest, +  NewVisited = set_union(set_from_list(NewSuccs), Visited),    find_bs_get_integer(NewLbls, NewCfg, NewVisited);  find_bs_get_integer([], Cfg, _) ->    Cfg. @@ -177,3 +177,19 @@ make_butlast([{Res, Size}|Rest], Var) ->  			[Var, hipe_icode:mk_const((1 bsl Size)-1)]),     hipe_icode:mk_primop([NewVar], 'bsr', [Var, hipe_icode:mk_const(Size)])     |make_butlast(Rest, NewVar)]. + +%%-------------------------------------------------------------------- +%% Sets + +set_from_list([]) -> #{}; +set_from_list(L) -> +  maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> +  case M of +    #{E := _} -> not_visited(T, M); +    _ -> [E|not_visited(T, M)] +  end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_coordinator.erl b/lib/hipe/icode/hipe_icode_coordinator.erl index d2f8748535..b073954ce7 100644 --- a/lib/hipe/icode/hipe_icode_coordinator.erl +++ b/lib/hipe/icode/hipe_icode_coordinator.erl @@ -106,12 +106,29 @@ handle_no_change_done(MFA, {Queue, Busy}) ->    {Queue, Busy -- [MFA]}.  last_action(PM, ServerPid, Mod, All) -> -  lists:foreach(fun (MFA) -> -		    gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, -		    receive  -		      {done_rewrite, MFA} -> ok -		    end -		end, All). +  last_action(PM, ServerPid, Mod, All, []). + +last_action(_, _, _, [], []) -> ok; +last_action(PM, ServerPid, Mod, [], [MFA|Busy]) -> +  receive +    {done_rewrite, MFA} -> +      last_action(PM, ServerPid, Mod, [], Busy) +  end; +last_action(PM, ServerPid, Mod, All0, Busy) -> +  receive +    {done_rewrite, MFA} -> +      last_action(PM, ServerPid, Mod, All0, Busy -- [MFA]) +  after 0 -> +      case ?MAX_CONCURRENT - length(Busy) of +	X when is_integer(X), X > 0 -> +	  [MFA|All1] = All0, +	  gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, +	  last_action(PM, ServerPid, Mod, All1, [MFA|Busy]); +	X when is_integer(X) -> +	  Busy1 = receive {done_rewrite, MFA} -> Busy -- [MFA] end, +	  last_action(PM, ServerPid, Mod, All0, Busy1) +      end +  end.  restart_funs({Queue, Busy} = QB, PM, All, ServerPid) ->    case ?MAX_CONCURRENT - length(Busy) of diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl index 12ed796690..af160769a1 100644 --- a/lib/hipe/icode/hipe_icode_range.erl +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -73,8 +73,8 @@  -type final_fun() :: fun((mfa(), [range()]) -> 'ok').  -type data() :: {mfa(), args_fun(), call_fun(), final_fun()}.  -type label() :: non_neg_integer(). --type info() :: gb_trees:tree(). --type work_list() :: {[label()], [label()], sets:set()}. +-type info() :: map(). +-type work_list() :: {[label()], [label()], set(label())}.  -type variable() :: #icode_variable{}.  -type annotated_variable() :: #icode_variable{}.  -type argument() :: #icode_const{} | variable(). @@ -82,10 +82,9 @@  -type instr_split_info()  :: {icode_instr(), [{label(),info()}]}.  -type last_instr_return() :: {instr_split_info(), range()}. --record(state, {info_map = gb_trees:empty()	:: info(),  -		counter  = dict:new()		:: dict:dict(), -		cfg				:: cfg(),  -		liveness = gb_trees:empty()	:: gb_trees:tree(), +-record(state, {info_map = #{}			:: info(), +		cfg				:: cfg(), +		liveness			:: hipe_icode_ssa:liveness(),  		ret_type			:: range(),  		lookup_fun			:: call_fun(),  		result_action			:: final_fun()}). @@ -187,17 +186,16 @@ safe_analyse(CFG, Data={MFA,_,_,_}) ->  rewrite_blocks(State) ->    CFG = state__cfg(State),    Start = hipe_icode_cfg:start_label(CFG), -  rewrite_blocks([Start], State, [Start]). +  rewrite_blocks([Start], State, set_from_list([Start])). --spec rewrite_blocks([label()], state(), [label()]) -> state(). +-spec rewrite_blocks([label()], state(), set(label())) -> state().  rewrite_blocks([Next|Rest], State, Visited) ->    Info = state__info_in(State, Next),    {NewState, NewLabels} = analyse_block(Next, Info, State, true), -  NewLabelsSet = ordsets:from_list(NewLabels), -  RealNew = ordsets:subtract(NewLabelsSet, Visited), -  NewVisited = ordsets:union([RealNew, Visited, [Next]]), -  NewWork = ordsets:union([RealNew, Rest]), +  RealNew = not_visited(NewLabels, Visited), +  NewVisited = set_union(set_from_list(RealNew), Visited), +  NewWork = RealNew ++ Rest,    rewrite_blocks(NewWork, NewState, NewVisited);  rewrite_blocks([], State, _) ->    State. @@ -1661,8 +1659,8 @@ state__init(Cfg, {MFA, ArgsFun, CallFun, FinalFun}) ->      false ->        NewParams = lists:zipwith(fun update_info/2, Params, Ranges),        NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), -      Info = enter_defines(NewParams, gb_trees:empty()), -      InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), +      Info = enter_defines(NewParams, #{}), +      InfoMap = #{{Start, in} => Info},        #state{info_map=InfoMap, cfg=NewCfg, liveness=Liveness,  	     ret_type=none_type(),  	     lookup_fun=CallFun, result_action=FinalFun} @@ -1700,7 +1698,7 @@ state__info_in(S, Label) ->    state__info(S, {Label, in}).  state__info(#state{info_map=IM}, Key) -> -  gb_trees:get(Key, IM). +  maps:get(Key, IM).  state__update_info(State, LabelInfo, Rewrite) ->    update_info(LabelInfo, State, [], Rewrite). @@ -1721,60 +1719,58 @@ update_info([], State, LabelAcc, _Rewrite) ->  state__info_in_update(S=#state{info_map=IM,liveness=Liveness}, Label, Info) ->    LabelIn = {Label, in}, -  case gb_trees:lookup(LabelIn, IM) of -    none ->  +  case IM of +    #{LabelIn := OldInfo} -> +      OldVars = maps:keys(OldInfo), +      case join_info_in(OldVars, OldInfo, Info) of +	fixpoint -> +	  fixpoint; +	NewInfo -> +	  S#state{info_map=IM#{LabelIn := NewInfo}} +      end; +    _ ->        LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label),        NamesLiveIn = [hipe_icode:var_name(Var) || Var <- LiveIn,  						 hipe_icode:is_var(Var)], -      OldInfo = gb_trees:empty(), +      OldInfo = #{},        case join_info_in(NamesLiveIn, OldInfo, Info) of  	fixpoint ->  -	  S#state{info_map=gb_trees:insert(LabelIn, OldInfo, IM)}; -	NewInfo -> -	  S#state{info_map=gb_trees:enter(LabelIn, NewInfo, IM)} -      end; -    {value, OldInfo} -> -      OldVars = gb_trees:keys(OldInfo), -      case join_info_in(OldVars, OldInfo, Info) of -	fixpoint ->  -	  fixpoint; +	  S#state{info_map=IM#{LabelIn => OldInfo}};  	NewInfo -> -	  S#state{info_map=gb_trees:update(LabelIn, NewInfo, IM)} +	  S#state{info_map=IM#{LabelIn => NewInfo}}        end    end.  join_info_in(Vars, OldInfo, NewInfo) -> -  case join_info_in(Vars, OldInfo, NewInfo, gb_trees:empty(), false) of +  case join_info_in(Vars, OldInfo, NewInfo, #{}, false) of      {Res, true} -> Res;      {_, false} -> fixpoint    end.  join_info_in([Var|Left], Info1, Info2, Acc, Changed) -> -  Type1 = gb_trees:lookup(Var, Info1), -  Type2 = gb_trees:lookup(Var, Info2), -  case {Type1, Type2} of -    {none, none} -> -      NewTree = gb_trees:insert(Var, none_type(), Acc), -      join_info_in(Left, Info1, Info2, NewTree, true); -    {none, {value, Val}} -> -      NewTree = gb_trees:insert(Var, Val, Acc), -      join_info_in(Left, Info1, Info2, NewTree, true); -    {{value, Val}, none} -> -      NewTree = gb_trees:insert(Var, Val, Acc), +  case {Info1, Info2} of +    {#{Var := Val}, #{Var := Val}} -> +      NewTree = Acc#{Var => Val},        join_info_in(Left, Info1, Info2, NewTree, Changed); -    {{value, Val}, {value, Val}} -> -      NewTree = gb_trees:insert(Var, Val, Acc), -      join_info_in(Left, Info1, Info2, NewTree, Changed); -    {{value, Val1}, {value, Val2}} -> -      {NewChanged, NewVal} =  +    {#{Var := Val1}, #{Var := Val2}} -> +      {NewChanged, NewVal} =  	case sup(Val1, Val2) of  	  Val1 ->  	    {Changed, Val1};  	  Val ->  	    {true, Val}  	end, -      NewTree = gb_trees:insert(Var, NewVal, Acc), -      join_info_in(Left, Info1, Info2, NewTree, NewChanged) +      NewTree = Acc#{Var => NewVal}, +      join_info_in(Left, Info1, Info2, NewTree, NewChanged); +    {_, #{Var := Val}} -> +      NewTree = Acc#{Var => Val}, +      join_info_in(Left, Info1, Info2, NewTree, true); +    {#{Var := Val}, _} -> +      NewTree = Acc#{Var => Val}, +      join_info_in(Left, Info1, Info2, NewTree, Changed); +    {_, _} -> +      NewTree = Acc#{Var => none_type()}, +      join_info_in(Left, Info1, Info2, NewTree, true)    end;  join_info_in([], _Info1, _Info2, Acc, NewChanged) ->    {Acc, NewChanged}. @@ -1786,7 +1782,7 @@ enter_defines([], Info) -> Info.  enter_define({PossibleVar, Range = #range{}}, Info) ->    case hipe_icode:is_var(PossibleVar) of      true ->  -      gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); +      Info#{hipe_icode:var_name(PossibleVar) => Range};      false ->        Info    end; @@ -1795,7 +1791,7 @@ enter_define(PossibleVar, Info) ->      true ->         case hipe_icode:variable_annotation(PossibleVar) of  	{range_anno, #ann{range=Range}, _} -> -	   gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); +	   Info#{hipe_icode:var_name(PossibleVar) => Range};  	_ ->  	  Info        end; @@ -1810,11 +1806,10 @@ enter_vals(Ins, Info) ->  lookup(PossibleVar, Info) ->    case hipe_icode:is_var(PossibleVar) of      true ->  -      case gb_trees:lookup(hipe_icode:var_name(PossibleVar), Info) of -	none -> -	  none_type(); -	{value, Val} -> -	  Val +      PossibleVarName = hipe_icode:var_name(PossibleVar), +      case Info of +	#{PossibleVarName := Val} -> Val; +	_ -> none_type()        end;      false ->        none_type() @@ -1828,10 +1823,10 @@ lookup(PossibleVar, Info) ->  init_work(State) ->    %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)),    Labels = [hipe_icode_cfg:start_label(state__cfg(State))], -  {Labels, [], sets:from_list(Labels)}. +  {Labels, [], set_from_list(Labels)}.  get_work({[Label|Left], List, Set}) -> -  NewWork = {Left, List, sets:del_element(Label, Set)}, +  NewWork = {Left, List, maps:remove(Label, Set)},    {Label, NewWork};  get_work({[], [], _Set}) ->    fixpoint; @@ -1839,12 +1834,12 @@ get_work({[], List, Set}) ->    get_work({lists:reverse(List), [], Set}).  add_work(Work = {List1, List2, Set}, [Label|Left]) -> -  case sets:is_element(Label, Set) of -    true -> +  case Set of +    #{Label := _} ->        add_work(Work, Left); -    false -> +    _ ->        %% io:format("Adding work: ~w\n", [Label]), -      add_work({List1, [Label|List2], sets:add_element(Label, Set)}, Left) +      add_work({List1, [Label|List2], Set#{Label => []}}, Left)    end;  add_work(Work, []) ->    Work. @@ -1959,3 +1954,21 @@ next_down_limit(X) when is_integer(X), X > -16#8000000 -> -16#8000000;  next_down_limit(X) when is_integer(X), X > -16#80000000 -> -16#80000000;  next_down_limit(X) when is_integer(X), X > -16#800000000000000 -> -16#800000000000000;  next_down_limit(_X) -> neg_inf. + +%%-------------------------------------------------------------------- +%% Sets + +-type set(E) :: #{E => []}. + +set_from_list([]) -> #{}; +set_from_list(L) -> +  maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> +  case M of +    #{E := []} -> not_visited(T, M); +    _ -> [E|not_visited(T, M)] +  end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_ssa.erl b/lib/hipe/icode/hipe_icode_ssa.erl index b222fbc7d2..aca13a2ff0 100644 --- a/lib/hipe/icode/hipe_icode_ssa.erl +++ b/lib/hipe/icode/hipe_icode_ssa.erl @@ -34,13 +34,16 @@  -define(LIVENESS, hipe_icode_liveness).  -define(LIVENESS_NEEDED, true). +-export_type([liveness/0]). +  -include("hipe_icode.hrl").  -include("../ssa/hipe_ssa.inc").  %% Declarations for exported functions which are Icode-specific. --spec ssa_liveness__analyze(#cfg{}) -> gb_trees:tree(). --spec ssa_liveness__livein(_, icode_lbl()) -> [#icode_variable{}]. -%% -spec ssa_liveness__livein(_, icode_lbl(), _) -> [#icode_var{}]. +-opaque liveness() :: liveness(icode_lbl(), #icode_variable{}). +-spec ssa_liveness__analyze(#cfg{}) -> liveness(). +-spec ssa_liveness__livein(liveness(), icode_lbl()) -> [#icode_variable{}]. +%% -spec ssa_liveness__livein(liveness(), icode_lbl(), _) -> [#icode_var{}].  %%----------------------------------------------------------------------  %% Auxiliary operations which seriously differ between Icode and RTL. diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl index 794c27ebcc..3f0e2998f1 100644 --- a/lib/hipe/icode/hipe_icode_type.erl +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -100,7 +100,7 @@  -record(state, {info_map  = gb_trees:empty() :: gb_trees:tree(),  		cfg                          :: cfg(), -		liveness  = gb_trees:empty() :: gb_trees:tree(), +		liveness                     :: hipe_icode_ssa:liveness(),  		arg_types                    :: [erl_types:erl_type()],  		ret_type  = [t_none()]       :: [erl_types:erl_type()],  		lookupfun                    :: call_fun(), diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index f8487151d7..96bcf7d7e8 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -49,12 +49,13 @@  	     hipe_amd64_ra_naive,  	     hipe_amd64_ra_postconditions,  	     hipe_amd64_ra_sse2_postconditions, -	     hipe_amd64_ra_x87_ls,  	     hipe_amd64_registers,  	     hipe_amd64_specific,  	     hipe_amd64_specific_sse2,  	     hipe_amd64_specific_x87,  	     hipe_amd64_spill_restore, +	     hipe_amd64_sse2, +	     hipe_amd64_subst,  	     hipe_amd64_x87,  	     hipe_arm,  	     hipe_arm_assemble, @@ -73,6 +74,7 @@  	     hipe_arm_ra_postconditions,  	     hipe_arm_registers,  	     hipe_arm_specific, +	     hipe_arm_subst,  	     hipe_bb,  	     hipe_beam_to_icode,  	     hipe_coalescing_regalloc, @@ -142,9 +144,11 @@  	     hipe_ppc_registers,  	     hipe_ppc_specific,  	     hipe_ppc_specific_fp, +	     hipe_ppc_subst,  	     hipe_profile,  	     hipe_reg_worklists,  	     hipe_regalloc_loop, +	     hipe_regalloc_prepass,  	     hipe_rtl,  	     hipe_rtl_arch,  	     hipe_rtl_arith_32, @@ -171,6 +175,7 @@  	     hipe_rtl_to_sparc,  	     hipe_rtl_to_x86,  	     hipe_rtl_varmap, +	     hipe_segment_trees,  	     hipe_sdi,  	     hipe_sparc,  	     hipe_sparc_assemble, @@ -193,6 +198,7 @@  	     hipe_sparc_registers,  	     hipe_sparc_specific,  	     hipe_sparc_specific_fp, +	     hipe_sparc_subst,  	     hipe_spillcost,  	     hipe_spillmin,  	     hipe_spillmin_color, @@ -216,11 +222,11 @@  	     hipe_x86_ra_ls,  	     hipe_x86_ra_naive,  	     hipe_x86_ra_postconditions, -	     hipe_x86_ra_x87_ls,  	     hipe_x86_registers,  	     hipe_x86_specific,  	     hipe_x86_specific_x87,  	     hipe_x86_spill_restore, +	     hipe_x86_subst,  	     hipe_x86_x87]},    {registered,[]},    {applications, [kernel,stdlib]}, diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index 6c525dd143..bee5da2195 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1118,9 +1118,10 @@ help_hiper() ->  help_options() ->    HostArch = erlang:system_info(hipe_architecture), -  O1 = expand_options([o1], HostArch), -  O2 = expand_options([o2], HostArch), -  O3 = expand_options([o3], HostArch), +  O0 = expand_options([o0] ++ ?COMPILE_DEFAULTS, HostArch), +  O1 = expand_options([o1] ++ ?COMPILE_DEFAULTS, HostArch), +  O2 = expand_options([o2] ++ ?COMPILE_DEFAULTS, HostArch), +  O3 = expand_options([o3] ++ ?COMPILE_DEFAULTS, HostArch),    io:format("HiPE Compiler Options\n" ++  	    " Boolean-valued options generally have corresponding " ++  	    "aliases `no_...',\n" ++ @@ -1139,15 +1140,16 @@ help_options() ->  	    "   pp_x86 = pp_native,\n" ++  	    "   pp_amd64 = pp_native,\n" ++  	    "   pp_ppc = pp_native,\n" ++ -	    "   o0,\n" ++ -	    "   o1 = ~p,\n" ++ +	    "   o0 = ~p,\n" ++ +	    "   o1 = ~p ++ o0,\n" ++  	    "   o2 = ~p ++ o1,\n" ++  	    "   o3 = ~p ++ o2.\n",  	    [ordsets:from_list([verbose, debug, time, load, pp_beam,  				pp_icode, pp_rtl, pp_native, pp_asm,  				timeout]),  	     expand_options([pp_all], HostArch), -	     O1 -- [o1], +	     O0 -- [o0], +	     (O1 -- O0) -- [o1],  	     (O2 -- O1) -- [o2],  	     (O3 -- O2) -- [o3]]),    ok. @@ -1352,6 +1354,8 @@ opt_keys() ->       pp_rtl_lcm,       pp_rtl_ssapre,       pp_rtl_linear, +     ra_partitioned, +     ra_prespill,       regalloc,       remove_comments,       rtl_ssa, @@ -1382,8 +1386,15 @@ opt_keys() ->  %% Definitions: +o0_opts(_TargetArch) -> +  [concurrent_comp, {regalloc,linear_scan}]. +  o1_opts(TargetArch) -> -  Common = [inline_fp, pmatch, peephole], +  Common = [inline_fp, pmatch, peephole, ra_prespill, ra_partitioned, +	    icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs, +	    rtl_ssa, rtl_ssa_const_prop, rtl_ssapre, +	    spillmin_color, use_indexing, remove_comments, +	    binary_opt, {regalloc,coalescing} | o0_opts(TargetArch)],    case TargetArch of      ultrasparc ->        Common; @@ -1402,11 +1413,8 @@ o1_opts(TargetArch) ->    end.  o2_opts(TargetArch) -> -  Common = [icode_ssa_const_prop, icode_ssa_copy_prop, % icode_ssa_struct_reuse, -	    icode_type, icode_inline_bifs, icode_call_elim, rtl_lcm, -	    rtl_ssa, rtl_ssa_const_prop, -	    spillmin_color, use_indexing, remove_comments, -	    concurrent_comp, binary_opt | o1_opts(TargetArch)], +  Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, +	    rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre])],    case TargetArch of      T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets        [icode_range | Common]; @@ -1416,7 +1424,7 @@ o2_opts(TargetArch) ->  o3_opts(TargetArch) ->    %% no point checking for target architecture since this is checked in 'o1' -  [icode_range, {regalloc,coalescing} | o2_opts(TargetArch)]. +  [icode_range | o2_opts(TargetArch)].  %% Note that in general, the normal form for options should be positive.  %% This is a good programming convention, so that tests in the code say @@ -1452,6 +1460,8 @@ opt_negations() ->     {no_pp_native, pp_native},     {no_pp_rtl_lcm, pp_rtl_lcm},     {no_pp_rtl_ssapre, pp_rtl_ssapre}, +   {no_ra_partitioned, ra_partitioned}, +   {no_ra_prespill, ra_prespill},     {no_remove_comments, remove_comments},     {no_rtl_ssa, rtl_ssa},     {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, @@ -1481,7 +1491,8 @@ opt_basic_expansions() ->    [{pp_all, [pp_beam, pp_icode, pp_rtl, pp_native]}].  opt_expansions(TargetArch) -> -  [{o1, o1_opts(TargetArch)}, +  [{o0, o0_opts(TargetArch)}, +   {o1, o1_opts(TargetArch)},     {o2, o2_opts(TargetArch)},     {o3, o3_opts(TargetArch)},     {to_llvm, llvm_opts(o3, TargetArch)}, @@ -1528,13 +1539,21 @@ expand_kt2(Opts) ->  -spec expand_options(comp_options(), hipe_architecture()) -> comp_options(). -expand_options(Opts, TargetArch) -> +expand_options(Opts0, TargetArch) -> +  Opts1 = proplists:normalize(Opts0, [{aliases, opt_aliases()}]), +  Opts = normalise_opt_options(Opts1),    proplists:normalize(Opts, [{negations, opt_negations()}, -			     {aliases, opt_aliases()},  			     {expand, opt_basic_expansions()},  			     {expand, opt_expansions(TargetArch)},  			     {negations, opt_negations()}]). +normalise_opt_options([o0|Opts]) -> [o0] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o1|Opts]) -> [o1] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o2|Opts]) -> [o2] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o3|Opts]) -> [o3] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([O|Opts]) -> [O|normalise_opt_options(Opts)]; +normalise_opt_options([]) -> []. +  -spec check_options(comp_options()) -> 'ok'.  check_options(Opts) -> diff --git a/lib/hipe/misc/Makefile b/lib/hipe/misc/Makefile index 72cfff21a8..e5033e444b 100644 --- a/lib/hipe/misc/Makefile +++ b/lib/hipe/misc/Makefile @@ -44,7 +44,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)  # Target Specs  # ----------------------------------------------------  ifdef HIPE_ENABLED -HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi  +HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi hipe_segment_trees  else  HIPE_MODULES =  endif diff --git a/lib/hipe/misc/hipe_sdi.erl b/lib/hipe/misc/hipe_sdi.erl index fbb4b105f6..5ca64bc669 100644 --- a/lib/hipe/misc/hipe_sdi.erl +++ b/lib/hipe/misc/hipe_sdi.erl @@ -36,10 +36,13 @@  %%------------------------------------------------------------------------  -type hipe_array() :: integer(). % declare this in hipe.hrl or builtin? +-type hipe_vector(E) :: {} | {E} | {E, E} | {E, E, E} | tuple().  -type label()      :: non_neg_integer().  -type address()    :: non_neg_integer(). +-type parents()    :: {hipe_vector(_ :: integer()), hipe_segment_trees:tree()}. +  %%------------------------------------------------------------------------  -record(label_data, {address :: address(), @@ -168,9 +171,11 @@ mk_long(N) ->  %%% - Since the graph is traversed from child to parent nodes in  %%%   Step 3, the edges are represented by a vector PARENTS[0..n-1]  %%%   such that PARENTS[j] = { i | i is a parent of j }. -%%% - An explicit PARENTS graph would have size O(n^2). Instead we -%%%   compute PARENTS[j] from the SDI vector when needed. This -%%%   reduces memory overheads, and may reduce time overheads too. +%%% - An explicit PARENTS graph would have size O(n^2). Instead, we +%%%   observe that (i is a parent of j) iff (j \in range(i)), where +%%%   range(i) is a constant function. We can thus precompute all the +%%%   ranges i and insert them into a data structure built for such +%%%   queries. In this case, we use a segment tree.  -spec mk_span(non_neg_integer(), tuple()) -> hipe_array().  mk_span(N, SDIS) -> @@ -188,7 +193,29 @@ initSPAN(SdiNr, N, SDIS, SPAN) ->        initSPAN(SdiNr+1, N, SDIS, SPAN)    end. -mk_parents(N, SDIS) -> {N,SDIS}. +-spec mk_parents(non_neg_integer(), tuple()) -> parents(). +mk_parents(N, SDIS) -> +  PrevSDIS = vector_from_list(select_prev_sdis(N-1, SDIS, [])), +  Ranges = parents_generate_ranges(N-1, PrevSDIS, []), +  {PrevSDIS, hipe_segment_trees:build(Ranges)}. + +select_prev_sdis(-1, _SDIS, Acc) -> Acc; +select_prev_sdis(SdiNr, SDIS, Acc) -> +  #sdi_data{prevSdi=PrevSdi} = vector_sub(SDIS, SdiNr), +  select_prev_sdis(SdiNr-1, SDIS, [PrevSdi|Acc]). + +parents_generate_ranges(-1, _PrevSDIS, Acc) -> Acc; +parents_generate_ranges(SdiNr, PrevSDIS, Acc) -> +  %% inclusive +  {LO,HI} = parents_generate_range(SdiNr, PrevSDIS), +  parents_generate_ranges(SdiNr-1, PrevSDIS, [{LO,HI}|Acc]). + +-compile({inline, parents_generate_range/2}). +parents_generate_range(SdiNr, PrevSDIS) -> +  PrevSdi = vector_sub(PrevSDIS, SdiNr), +  if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi};	% forwards +     true -> {PrevSdi+1, SdiNr-1}		% backwards +  end.  %%% "After the structure is built we process it as follows.  %%% For any node i whose listed span exceeds the architectural @@ -209,7 +236,7 @@ mk_parents(N, SDIS) -> {N,SDIS}.  %%%   and PARENTS are no longer useful.  -spec update_long(non_neg_integer(), tuple(), hipe_array(), -		  {non_neg_integer(),tuple()},hipe_array()) -> 'ok'. +		  parents(),hipe_array()) -> 'ok'.  update_long(N, SDIS, SPAN, PARENTS, LONG) ->    WKL = initWKL(N-1, SDIS, SPAN, []),    processWKL(WKL, SDIS, SPAN, PARENTS, LONG). @@ -225,46 +252,32 @@ initWKL(SdiNr, SDIS, SPAN, WKL) ->    end.  -spec processWKL([non_neg_integer()], tuple(), hipe_array(), -		 {non_neg_integer(), tuple()}, hipe_array()) -> 'ok'. +		 parents(), hipe_array()) -> 'ok'.  processWKL([], _SDIS, _SPAN, _PARENTS, _LONG) -> ok; -processWKL([Child|WKL], SDIS, SPAN, PARENTS, LONG) -> -  WKL2 = updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG), +processWKL([Child|WKL], SDIS, SPAN, PARENTS0, LONG) -> +  {WKL2, PARENTS} = +    case array_sub(SPAN, Child) of +      0 -> {WKL, PARENTS0};				% removed +      _ -> +	SdiData = vector_sub(SDIS, Child), +	Incr = sdiLongIncr(SdiData), +	array_update(LONG, Child, Incr), +	array_update(SPAN, Child, 0),			% remove child +	PARENTS1 = deleteParent(PARENTS0, Child), +	PS = parentsOfChild(PARENTS1, Child), +	{updateParents(PS, Child, Incr, SDIS, SPAN, WKL), PARENTS1} +    end,    processWKL(WKL2, SDIS, SPAN, PARENTS, LONG). --spec updateChild(non_neg_integer(), [non_neg_integer()], tuple(), hipe_array(), -		  {non_neg_integer(),tuple()}, hipe_array()) -> [non_neg_integer()]. -updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG) -> -  case array_sub(SPAN, Child) of -    0 -> WKL;						% removed -    _ -> -      SdiData = vector_sub(SDIS, Child), -      Incr = sdiLongIncr(SdiData), -      array_update(LONG, Child, Incr), -      array_update(SPAN, Child, 0),			% remove child -      PS = parentsOfChild(PARENTS, Child), -      updateParents(PS, Child, Incr, SDIS, SPAN, WKL) -  end. +-spec parentsOfChild(parents(), non_neg_integer()) -> [non_neg_integer()]. +parentsOfChild({_PrevSDIS, SegTree}, Child) -> +  hipe_segment_trees:intersect(Child, SegTree). --spec parentsOfChild({non_neg_integer(),tuple()}, -		     non_neg_integer()) -> [non_neg_integer()]. -parentsOfChild({N,SDIS}, Child) -> -  parentsOfChild(N-1, SDIS, Child, []). - --spec parentsOfChild(integer(), tuple(), non_neg_integer(), -		     [non_neg_integer()]) -> [non_neg_integer()]. -parentsOfChild(-1, _SDIS, _Child, PS) -> PS; -parentsOfChild(SdiNr, SDIS, Child, PS) -> -  SdiData = vector_sub(SDIS, SdiNr), -  #sdi_data{prevSdi=PrevSdi} = SdiData, -  {LO,HI} =	% inclusive -    if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi};	% forwards -       true -> {PrevSdi+1, SdiNr-1}		% backwards -    end, -  NewPS = -    if LO =< Child, Child =< HI -> [SdiNr | PS]; -       true -> PS -    end, -  parentsOfChild(SdiNr-1, SDIS, Child, NewPS). +-spec deleteParent(parents(), non_neg_integer()) -> parents(). +deleteParent({PrevSDIS, SegTree0}, Parent) -> +  {LO,HI} = parents_generate_range(Parent, PrevSDIS), +  SegTree = hipe_segment_trees:delete(Parent, LO, HI, SegTree0), +  {PrevSDIS, SegTree}.  -spec updateParents([non_neg_integer()], non_neg_integer(),  		    byte(), tuple(), hipe_array(), @@ -297,10 +310,12 @@ updateWKL(SdiNr, SDIS, SdiSpan, WKL) ->      false -> [SdiNr|WKL]    end. +-compile({inline, sdiSpanIsShort/2}). %% Only called once  -spec sdiSpanIsShort(#sdi_data{}, integer()) -> boolean().  sdiSpanIsShort(#sdi_data{si = #sdi_info{lb = LB, ub = UB}}, SdiSpan) ->    SdiSpan >= LB andalso SdiSpan =< UB. +-compile({inline, sdiLongIncr/1}). %% Only called once  -spec sdiLongIncr(#sdi_data{}) -> byte().  sdiLongIncr(#sdi_data{si = #sdi_info{incr = Incr}}) -> Incr. @@ -361,9 +376,11 @@ applyIncr([{Label,LabelData}|List], INCREMENT, LabelMap) ->  %%% Currently implemented as tuples.  %%% Used for the 'SDIS' and 'PARENTS' vectors. --spec vector_from_list([#sdi_data{}]) -> tuple(). +-spec vector_from_list([E]) -> hipe_vector(E).  vector_from_list(Values) -> list_to_tuple(Values). +-compile({inline, vector_sub/2}). +-spec vector_sub(hipe_vector(E), non_neg_integer()) -> V when V :: E.  vector_sub(Vec, I) -> element(I+1, Vec).  %%% ADT for mutable integer arrays, indexed from 0 to N-1. @@ -373,8 +390,10 @@ vector_sub(Vec, I) -> element(I+1, Vec).  -spec mk_array_of_zeros(non_neg_integer()) -> hipe_array().  mk_array_of_zeros(N) -> hipe_bifs:array(N, 0). +-compile({inline, array_update/3}).  -spec array_update(hipe_array(), non_neg_integer(), integer()) -> hipe_array().  array_update(A, I, V) -> hipe_bifs:array_update(A, I, V). +-compile({inline, array_sub/2}).  -spec array_sub(hipe_array(), non_neg_integer()) -> integer().  array_sub(A, I) -> hipe_bifs:array_sub(A, I). diff --git a/lib/hipe/misc/hipe_segment_trees.erl b/lib/hipe/misc/hipe_segment_trees.erl new file mode 100644 index 0000000000..22146396c3 --- /dev/null +++ b/lib/hipe/misc/hipe_segment_trees.erl @@ -0,0 +1,181 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2016. All Rights Reserved. +%%% +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%%     http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Segment trees, with a delete operation. +%%% +%%% Keys are the (0-based) indices into the list passed to build/1. +%%% +%%% Range bounds are inclusive. +%%% + +-module(hipe_segment_trees). + +-export([build/1, intersect/2, delete/4]). + +-record(segment_tree, { +	  lo            :: integer(), +	  hi            :: integer(), +	  root          :: tnode() +	 }). + +%% X =< Mid belongs in Left +-define(NODE(Left, Right, Mid, Segments), {Left, Right, Mid, Segments}). + +-define(POINT_LEAF(Val), Val). +-define(RANGE_LEAF(Lo, Hi), {Lo, Hi}). + +-type segments() :: [non_neg_integer()]. +-type leaf()     :: segments(). +-type tnode()    :: ?NODE(tnode(), tnode(), integer(), segments()) | leaf(). + +-opaque tree() :: #segment_tree{} | nil. +-export_type([tree/0]). + +%% @doc Builds a segment tree of the given intervals. +-spec build([{integer(), integer()}]) -> tree(). +build(ListOfIntervals) -> +    case +	lists:usort( +	  lists:append( +	    [[Lo, Hi] || {Lo, Hi} <- ListOfIntervals, Lo =< Hi])) +    of +	[] -> nil; +	Endpoints -> +	    Tree0 = empty_tree_from_endpoints(Endpoints), +	    [Lo|_] = Endpoints, +	    Hi = lists:last(Endpoints), +	    Tree1 = insert_intervals(0, ListOfIntervals, Lo, Hi, Tree0), +	    Tree = squash_empty_subtrees(Tree1), +	    #segment_tree{lo=Lo, hi=Hi, root=Tree} +    end. + +empty_tree_from_endpoints(Endpoints) -> +    Leaves = leaves(Endpoints), +    {T, [], _, _} = balanced_bst(Leaves, length(Leaves)), +    T. + +leaves([Endpoint]) -> [?POINT_LEAF(Endpoint)]; +leaves([A | [B|_] = Tail]) -> +    %% We omit the range leaf if it's empty +    case A<B-1 of +	true  -> [?POINT_LEAF(A),?RANGE_LEAF(A+1,B-1) | leaves(Tail)]; +	false -> [?POINT_LEAF(A) | leaves(Tail)] +    end. + +balanced_bst(L, S) when S > 1 -> +    Sm = S, %% - 1 +    S2 = Sm div 2, +    S1 = Sm - S2, +    {Left, L1, LeftLo, LeftHi} = balanced_bst(L, S1), +    {Right, L2, _, RightHi} = balanced_bst(L1, S2), +    T = ?NODE(Left, Right, LeftHi, []), +    {T, L2, LeftLo, RightHi}; +balanced_bst([?RANGE_LEAF(Lo, Hi) | L], 1) -> +    {[], L, Lo, Hi}; +balanced_bst([?POINT_LEAF(Val) | L], 1) -> +    {[], L, Val, Val}. + +insert_intervals(_Ix, [], _Lo, _Hi, Tree) -> Tree; +insert_intervals(Ix, [Int|Ints], Lo, Hi, Tree) -> +    insert_intervals(Ix + 1, Ints, Lo, Hi, +		     insert_interval(Ix, Int, Lo, Hi, Tree)). + +insert_interval(_, {Lo, Hi}, _, _, Node) when Lo > Hi -> Node; +insert_interval(I, Int={Lo,Hi}, NLo, NHi, +		?NODE(Left0, Right0, Mid, Segments)) -> +    if Lo =< NLo, NHi =< Hi -> +	    ?NODE(Left0, Right0, Mid, [I|Segments]); +       true -> +	    Left = case intervals_intersect(Lo, Hi,    NLo, Mid) of +		       true -> insert_interval(I, Int, NLo, Mid, Left0); +		       false -> Left0 +		   end, +	    Right = case intervals_intersect(Lo, Hi,    Mid+1, NHi) of +			true -> insert_interval(I, Int, Mid+1, NHi, Right0); +			false -> Right0 +		   end, +	    ?NODE(Left, Right, Mid, Segments) +    end; +insert_interval(I, {_Lo,_Hi}, _NLo, _NHi, Leaf) -> [I|Leaf]. + +intervals_intersect(ALo, AHi, BLo, BHi) -> +    (ALo =< AHi) andalso (BLo =< BHi) %% both nonempty +	andalso nonempty_intervals_intersect(ALo, AHi, BLo, BHi). + +%% Purely optional optimisation +squash_empty_subtrees(?NODE(Left0, Right0, Mid, Segs)) -> +    build_squash_node(squash_empty_subtrees(Left0), +		      squash_empty_subtrees(Right0), +		      Mid, Segs); +squash_empty_subtrees(Leaf) -> Leaf. + +build_squash_node([], [], _, Segs) -> Segs; +build_squash_node(Left, Right, Mid, Segs) -> +    ?NODE(Left, Right, Mid, Segs). + +%% @doc Returns the indices of the intervals in the tree that contains Point. +-spec intersect(integer(), tree()) -> [non_neg_integer()]. +intersect(Point, nil) when is_integer(Point) -> []; +intersect(Point, #segment_tree{lo=Lo, hi=Hi, root=Root}) +  when is_integer(Point) -> +    case Lo =< Point andalso Point =< Hi of +	false -> []; +	true -> intersect_1(Point, Root, []) +    end. + +intersect_1(Point, ?NODE(Left, Right, Mid, Segs), Acc0) -> +    Child = if Point =< Mid -> Left; true -> Right end, +    intersect_1(Point, Child, Segs ++ Acc0); +intersect_1(_, LeafSegs, Acc) -> LeafSegs ++ Acc. + +%% @doc Deletes the interval {Lo, Hi}, which had index Index in the list passed +%%      to build/1. +-spec delete(non_neg_integer(), integer(), integer(), tree()) -> tree(). +delete(_, _, _, nil) -> nil; +delete(_, Lo, Hi, Tree) when Lo > Hi -> Tree; +delete(_, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi}) +  when Hi < TLo; Lo > THi -> Tree; +delete(Index, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi, root=Root0}) +  when is_integer(Lo), is_integer(Hi) -> +    Root = delete_1(Index, Lo, Hi, TLo, THi, Root0), +    Tree#segment_tree{root=Root}. + +delete_1(I, Lo, Hi, NLo, NHi, ?NODE(Left0, Right0, Mid, Segments)) -> +    if Lo =< NLo, NHi =< Hi -> +	    ?NODE(Left0, Right0, Mid, delete_2(Segments, I)); +       true -> +	    Left = case nonempty_intervals_intersect(Lo, Hi, NLo, Mid) of +		       true -> delete_1(I, Lo, Hi, NLo, Mid, Left0); +		       false -> Left0 +		   end, +	    Right = case nonempty_intervals_intersect(Lo, Hi, Mid+1, NHi) of +			true -> delete_1(I, Lo, Hi, Mid+1, NHi, Right0); +			false -> Right0 +		   end, +	    %% We could do build_squash_node here, is it worth it? +	    ?NODE(Left, Right, Mid, Segments) +    end; +delete_1(I, _Lo, _Hi, _NLo, _NHi, Leaf) -> delete_2(Leaf, I). + +delete_2([I|Segs], I) -> Segs; +delete_2([S|Segs], I) -> [S|delete_2(Segs,I)]. + +-compile({inline,nonempty_intervals_intersect/4}). +nonempty_intervals_intersect(ALo, AHi, BLo, BHi) -> +    (BLo =< AHi) andalso (ALo =< BHi). diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl index 4eeb1d71db..a2efd35d26 100644 --- a/lib/hipe/opt/hipe_spillmin.erl +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -29,7 +29,8 @@  %% ==========================================================================  %% Exported functions (short description):  %% -%%  stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->  +%%  stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, +%%             TempMap) ->  %%      {Coloring, NumberOfSpills}  %%    Takes a CFG and the TempMap from register allocation and returns   %%    a coloring of stack slots.   @@ -49,7 +50,7 @@  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  -module(hipe_spillmin). --export([stackalloc/6, mapmerge/2]). +-export([stackalloc/7, stackalloc/8, mapmerge/2]).  %%-define(DEBUG, 1).  -define(HIPE_INSTRUMENT_COMPILER, true). @@ -59,6 +60,8 @@  -include("../main/hipe.hrl").  -include("../flow/cfg.hrl"). +-type target_context() :: any(). +  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  %%  %% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap)  @@ -68,18 +71,29 @@  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  -spec stackalloc(#cfg{}, [_], non_neg_integer(), -		 comp_options(), module(), hipe_temp_map()) -> -                                {hipe_spill_map(), non_neg_integer()}. +		 comp_options(), module(), target_context(), hipe_temp_map()) -> +		    {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap) -> +  Liveness = TgtMod:analyze(CFG,TgtCtx), +  stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), +		 comp_options(), module(), target_context(), hipe_temp_map()) -> +		    {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, +	   TempMap) ->    case proplists:get_bool(spillmin_color, Options) of      false -> -      ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, -						 Options, Target, TempMap), +      ?option_time(hipe_spillmin_scan:stackalloc( +		     CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, +		     TgtCtx, TempMap),  		   "Spill minimize, linear scan", Options);      true -> -      ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, -						  Options, Target, TempMap), +      ?option_time(hipe_spillmin_color:stackalloc( +		     CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, +		     TgtCtx, TempMap),  		   "Spill minimize, graph coloring", Options)    end. diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl index 7c23de44b4..a0d6b03503 100644 --- a/lib/hipe/opt/hipe_spillmin_color.erl +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -41,7 +41,7 @@  -module(hipe_spillmin_color). --export([stackalloc/6]). +-export([stackalloc/8]).  %%-ifndef(DO_ASSERT).  %%-define(DO_ASSERT, true). @@ -66,13 +66,17 @@  %%  where Location is {spill,M}.  %% {spill,M} denotes the Mth spilled node --spec stackalloc(#cfg{}, [_], non_neg_integer(), -		 comp_options(), module(), hipe_temp_map()) -> +-type target_context() :: any(). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), +		 comp_options(), module(), target_context(), hipe_temp_map()) ->                                  {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> +stackalloc(CFG, Live, _StackSlots, SpillIndex, _Options, TargetMod, +	   TargetContext, TempMap) -> +  Target = {TargetMod, TargetContext},    ?report2("building IG~n", []), -  {IG, NumNodes} = build_ig(CFG, Target, TempMap), +  {IG, NumNodes} = build_ig(CFG, Live, Target, TempMap),    {Cols, MaxColors} =       color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1),    SortedCols = lists:sort(Cols), @@ -167,8 +171,8 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) ->  %% Returns {Interference_graph, Number_Of_Nodes}  %% -build_ig(CFG, Target, TempMap) -> -  try build_ig0(CFG, Target, TempMap) +build_ig(CFG, Live, Target, TempMap) -> +  try build_ig0(CFG, Live, Target, TempMap)    catch error:Rsn -> exit({regalloc, build_ig, Rsn})    end. @@ -185,12 +189,11 @@ setup_ets0([X|Xs], Table, N) ->    ets:insert(Table, {X, N}),    setup_ets0(Xs, Table, N+1). -build_ig0(CFG, Target, TempMap) -> -  Live = Target:analyze(CFG), +build_ig0(CFG, Live, Target, TempMap) ->    TempMapping = map_spilled_temporaries(TempMap),    TempMappingTable = setup_ets(TempMapping),    NumSpilled = length(TempMapping), -  IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled), +  IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled),  		    Target, TempMap, TempMappingTable),    ets:delete(TempMappingTable),    {normalize_ig(IG), NumSpilled}. @@ -540,18 +543,21 @@ is_visited(X, Vis) ->  %% *** INTERFACES TO OTHER MODULES ***  %% -liveout(CFG, L, Target) -> -  ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +labels(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> +  ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). -bb(CFG, L, Target) -> -   hipe_bb:code(Target:bb(CFG, L)). +bb(CFG, L, {TgtMod,TgtCtx}) -> +   hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). -def_use(X, Target, TempMap) -> -  Defines = [Y || Y <- reg_names(Target:defines(X), Target),  +def_use(X, Target={TgtMod,TgtCtx}, TempMap) -> +  Defines = [Y || Y <- reg_names(TgtMod:defines(X,TgtCtx), Target),  		  hipe_temp_map:is_spilled(Y, TempMap)], -  Uses = [Z || Z <- reg_names(Target:uses(X), Target),  +  Uses = [Z || Z <- reg_names(TgtMod:uses(X,TgtCtx), Target),  	       hipe_temp_map:is_spilled(Z, TempMap)],    {Defines, Uses}. -reg_names(Regs, Target) -> -  [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl index 06b68e1934..097a787152 100644 --- a/lib/hipe/opt/hipe_spillmin_scan.erl +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -60,7 +60,7 @@  -module(hipe_spillmin_scan). --export([stackalloc/6]). +-export([stackalloc/8]).  %%-define(DEBUG, 1).  -define(HIPE_INSTRUMENT_COMPILER, true). @@ -70,6 +70,8 @@  -include("../main/hipe.hrl").  -include("../flow/cfg.hrl"). +-type target_context() :: any(). +  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  %%  %% stackalloc(CFG, StackSlots,  SpillIndex, Options, Target, TempMap)  @@ -85,15 +87,14 @@  %%  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec stackalloc(#cfg{}, [_], non_neg_integer(), -		 comp_options(), module(), hipe_temp_map()) -> +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), +		 comp_options(), module(), target_context(), hipe_temp_map()) ->                                  {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TargetMod, +	   TargetContext, TempMap) -> +  Target = {TargetMod, TargetContext},    ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), -  %% Step 1: Calculate liveness (Call external implementation.) -  Liveness = liveness(CFG, Target), -  ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]),    USIntervals = calculate_intervals(CFG, Liveness, Options,  				    Target, TempMap),    %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -124,8 +125,8 @@ stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) ->  %%  all other.  %%-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  calculate_intervals(CFG, Liveness, _Options, Target, TempMap) -> -  Interval = empty_interval(Target:number_of_temporaries(CFG)), -  Worklist = Target:reverse_postorder(CFG), +  Interval = empty_interval(number_of_temporaries(CFG, Target)), +  Worklist = reverse_postorder(CFG, Target),    intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap).  %%-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - @@ -538,23 +539,26 @@ extend_interval(Pos, {Beginning, End})  %%   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> -  Target:analyze(CFG). +bb(CFG, L, {TgtMod,TgtCtx}) -> +  TgtMod:bb(CFG, L, TgtCtx). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:livein(Liveness, L, TgtCtx), Target). -bb(CFG, L, Target) -> -  Target:bb(CFG, L). +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:liveout(Liveness, L, TgtCtx), Target). -livein(Liveness, L, Target) -> -  regnames(Target:livein(Liveness, L), Target). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:number_of_temporaries(CFG, TgtCtx). -liveout(Liveness, L, Target) -> -  regnames(Target:liveout(Liveness, L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:uses(I,TgtCtx), Target). -uses(I, Target) -> -  regnames(Target:uses(I), Target). +defines(I, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:defines(I,TgtCtx), Target). -defines(I, Target) -> -  regnames(Target:defines(I), Target). +regnames(Regs, {TgtMod,TgtCtx}) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -regnames(Regs, Target) -> -  [Target:reg_nr(X) || X <- Regs].  +reverse_postorder(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:reverse_postorder(CFG, TgtCtx). diff --git a/lib/hipe/ppc/Makefile b/lib/hipe/ppc/Makefile index 1901dfa671..1ca1d51846 100644 --- a/lib/hipe/ppc/Makefile +++ b/lib/hipe/ppc/Makefile @@ -63,6 +63,7 @@ MODULES=hipe_ppc \  	hipe_ppc_ra_postconditions \  	hipe_ppc_ra_postconditions_fp \  	hipe_ppc_registers \ +	hipe_ppc_subst \  	hipe_rtl_to_ppc  HRL_FILES=hipe_ppc.hrl diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl index 34d4bf54c5..ee9b4432e0 100644 --- a/lib/hipe/ppc/hipe_ppc_cfg.erl +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -24,6 +24,7 @@  -export([init/1,           labels/1, start_label/1,           succ/2, +         map_bbs/2, fold_bbs/3,           bb/2, bb_add/3]).  -export([postorder/1]).  -export([linearise/1, params/1, reverse_postorder/1]). @@ -34,6 +35,7 @@  -define(BREADTH_ORDER,true).  -define(PARAMS_NEEDED,true).  -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true).  -include("hipe_ppc.hrl").  -include("../flow/cfg.hrl"). diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl index 77b84dc574..305e88488d 100644 --- a/lib/hipe/ppc/hipe_ppc_defuse.erl +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -22,6 +22,7 @@  -module(hipe_ppc_defuse).  -export([insn_def_all/1, insn_use_all/1]).  -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]).  -export([insn_def_fpr/1, insn_use_fpr/1]).  -include("hipe_ppc.hrl"). @@ -52,6 +53,9 @@ insn_def_gpr(I) ->      _ -> []    end. +insn_defs_all_gpr(#pseudo_call{}) -> true; +insn_defs_all_gpr(_) -> false. +  call_clobbered_gpr() ->    [hipe_ppc:mk_temp(R, T)     || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -116,6 +120,9 @@ insn_def_fpr(I) ->      _ -> []    end. +insn_defs_all_fpr(#pseudo_call{}) -> true; +insn_defs_all_fpr(_) -> false. +  call_clobbered_fpr() ->    [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index ff0450270f..8d37159ad8 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -24,16 +24,14 @@  -include("hipe_ppc.hrl").  -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> -  Formals = fix_formals(hipe_ppc:defun_formals(Defun)), -  Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), -  MinFrame = defun_minframe(Defun), +frame(CFG) -> +  Formals = fix_formals(hipe_ppc_cfg:params(CFG)), +  Temps0 = all_temps(CFG, Formals), +  MinFrame = defun_minframe(CFG),    Temps = ensure_minframe(MinFrame, Temps0), -  ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), -  CFG0 = hipe_ppc_cfg:init(Defun), -  Liveness = hipe_ppc_liveness_all:analyse(CFG0), -  CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), -  hipe_ppc_cfg:linearise(CFG1). +  ClobbersLR = clobbers_lr(CFG), +  Liveness = hipe_ppc_liveness_all:analyse(CFG), +  do_body(CFG, Liveness, Formals, Temps, ClobbersLR).  fix_formals(Formals) ->    fix_formals(hipe_ppc_registers:nr_args(), Formals). @@ -44,27 +42,16 @@ fix_formals(_, []) -> [].  do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) ->    Context = mk_context(Liveness, Formals, Temps, ClobbersLR), -  CFG1 = do_blocks(CFG0, Context), +  CFG1 = hipe_ppc_cfg:map_bbs( +	   fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG0),    do_prologue(CFG1, Context). -do_blocks(CFG, Context) -> -  Labels = hipe_ppc_cfg:labels(CFG), -  do_blocks(Labels, CFG, Context). - -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) ->    Liveness = context_liveness(Context),    LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), -  Block = hipe_ppc_cfg:bb(CFG, Label),    Code = hipe_bb:code(Block), -  NewCode = do_block(Code, LiveOut, Context), -  NewBlock = hipe_bb:code_update(Block, NewCode), -  NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), -  do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> -  CFG. - -do_block(Insns, LiveOut, Context) -> -  do_block(Insns, LiveOut, Context, context_framesize(Context), []). +  NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), +  hipe_bb:code_update(Block, NewCode).  do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->    {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -573,29 +560,41 @@ temp_is_pseudo(Temp) ->  %%% Detect if a Defun's body clobbers LR.  %%% -clobbers_lr([I|Insns]) -> -  case I of -    #pseudo_call{} -> true; -    %% mtspr to lr cannot occur yet -    _ -> clobbers_lr(Insns) -  end; -clobbers_lr([]) -> false. +clobbers_lr(CFG) -> +  any_insn(fun(#pseudo_call{}) -> true; +	      (_) -> false +	   end, CFG). + +any_insn(Pred, CFG) -> +  %% Abuse fold to do an efficient "any"-operation using nonlocal control flow +  FoundSatisfying = make_ref(), +  try fold_insns(fun (I, _) -> +		     case Pred(I) of +		       true -> throw(FoundSatisfying); +		       false -> false +		     end +		 end, false, CFG) +  of _ -> false +  catch FoundSatisfying -> true +  end.  %%%  %%% Build the set of all temps used in a Defun's body.  %%% -all_temps(Code, Formals) -> -  S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> +  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),    S1 = tset_del_list(S0, Formals),    tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) ->    S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), -  S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), -  find_temps(Insns, S2); -find_temps([], S) -> -  S. +  tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> +  hipe_ppc_cfg:fold_bbs( +    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, +    InitAcc, CFG).  tset_empty() ->    gb_sets:new(). @@ -624,16 +623,11 @@ tset_to_list(S) ->  %%% in the middle of a tailcall.  %%% -defun_minframe(Defun) -> -  MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), -  MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), +defun_minframe(CFG) -> +  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), +  MyArity = length(fix_formals(hipe_ppc_cfg:params(CFG))),    erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> -  body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> -  MTA. -  insn_mta(I, MTA) ->    case I of      #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl index fd5cc2befb..5d1b0d0305 100644 --- a/lib/hipe/ppc/hipe_ppc_main.erl +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -24,8 +24,10 @@  rtl_to_ppc(MFA, RTL, Options) ->    PPC1 = hipe_rtl_to_ppc:translate(RTL), -  PPC2 = hipe_ppc_ra:ra(PPC1, Options), -  PPC3 = hipe_ppc_frame:frame(PPC2), +  PPC1CFG = hipe_ppc_cfg:init(PPC1), +  PPC2CFG = hipe_ppc_ra:ra(PPC1CFG, Options), +  PPC3CFG = hipe_ppc_frame:frame(PPC2CFG), +  PPC3 = hipe_ppc_cfg:linearise(PPC3CFG),    PPC4 = hipe_ppc_finalise:finalise(PPC3),    ppc_pp(PPC4, MFA, Options),    {native, powerpc, {unprofiled, PPC4}}. diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl index 87c776f5d1..f8614db4ef 100644 --- a/lib/hipe/ppc/hipe_ppc_ra.erl +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -22,36 +22,40 @@  -module(hipe_ppc_ra).  -export([ra/2]). -ra(Defun0, Options) -> -  %% hipe_ppc_pp:pp(Defun0), -  {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> +  %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG0)), +  {CFG1, _FPLiveness1, Coloring_fp, SpillIndex}      = case proplists:get_bool(inline_fp, Options) of  	true -> -	  hipe_regalloc_loop:ra_fp(Defun0, Options, +	  FPLiveness0 = hipe_ppc_specific_fp:analyze(CFG0, no_context), +	  hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options,  				   hipe_coalescing_regalloc, -				   hipe_ppc_specific_fp); +				   hipe_ppc_specific_fp, no_context);  	false -> -	  {Defun0,[],0} +	  {CFG0,undefined,[],0}        end, -  %% hipe_ppc_pp:pp(Defun1), -  {Defun2, Coloring} +  %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG1)), +  GPLiveness1 = hipe_ppc_specific:analyze(CFG1, no_context), +  {CFG2, _GPLiveness2, Coloring}      = case proplists:get_value(regalloc, Options, coalescing) of  	coalescing -> -	  ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc);  	optimistic -> -	  ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc);  	graph_color -> -	  ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, +	     hipe_graph_coloring_regalloc);  	linear_scan -> -	  hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); +	  hipe_ppc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options);  	naive -> -	  hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); +	  hipe_ppc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options);          _ ->  	  exit({unknown_regalloc_compiler_option,  		proplists:get_value(regalloc,Options)})        end, -  %% hipe_ppc_pp:pp(Defun2), -  hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). +  %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG2)), +  hipe_ppc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> -  hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> +  hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, +			hipe_ppc_specific, no_context). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl index ea163221c2..78f123116e 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -23,13 +23,14 @@  -export([finalise/3]).  -include("hipe_ppc.hrl"). -finalise(Defun, TempMap, FPMap0) -> -  Code = hipe_ppc:defun_code(Defun), -  {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> +  {_, SpillLimit} = hipe_gensym:var_range(ppc),    Map = mk_ra_map(TempMap, SpillLimit),    FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), -  NewCode = ra_code(Code, Map, FPMap1, []), -  Defun#defun{code=NewCode}. +  hipe_ppc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> +  hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])).  ra_code([I|Insns], Map, FPMap, Accum) ->    ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl index 6e8304467e..5f331542e8 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_ls.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -21,37 +21,35 @@  %%% Linear Scan register allocator for PowerPC  -module(hipe_ppc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> -  NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), -  CFG = hipe_ppc_cfg:init(NewDefun), -  SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), -  alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> +  SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG, no_context), +  alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> -  CFG = hipe_ppc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) ->    {Coloring, _NewSpillIndex} =      regalloc( -      CFG, +      CFG, Liveness,        hipe_ppc_registers:allocatable_gpr()--        [hipe_ppc_registers:temp3(),         hipe_ppc_registers:temp2(),         hipe_ppc_registers:temp1()],        [hipe_ppc_cfg:start_label(CFG)],        SpillIndex, SpillLimit, Options, -      hipe_ppc_specific), -  {NewDefun, _DidSpill} = +      hipe_ppc_specific, no_context), +  {NewCFG, _DidSpill} =      hipe_ppc_ra_postconditions:check_and_rewrite( -      Defun, Coloring, 'linearscan'), -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), +      CFG, Coloring, 'linearscan'), +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context),    {TempMap2,_NewSpillIndex2} = -    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, -			     hipe_ppc_specific, TempMap), +    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, +			     hipe_ppc_specific, no_context, TempMap),    Coloring2 =      hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), -  {NewDefun, Coloring2}. +  {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> -  hipe_ls_regalloc:regalloc( -    CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +	 TgtMod, TgtCtx) -> +  hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, +			    DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl index 24995be252..322fb1a171 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_naive.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -20,11 +20,11 @@  %%  -module(hipe_ppc_ra_naive). --export([ra/3]). +-export([ra/4]).  -include("hipe_ppc.hrl"). -ra(Defun, _Coloring_fp, _Options) ->	% -> {Defun, Coloring} -  {NewDefun,_DidSpill} = -    hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), -  {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) ->	% -> {CFG, Liveness, Coloring} +  {NewCFG,_DidSpill} = +    hipe_ppc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), +  {NewCFG, Liveness, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl index 0b16ec3891..f084a30e63 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -25,17 +25,13 @@  -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), -  check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context), +  check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) ->    Strategy = strategy(Allocator), -  #defun{code=Code0} = Defun, -  {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), -  VarRange = {0, hipe_gensym:get_var(ppc)}, -  {Defun#defun{code=Code1, var_range=VarRange}, -   DidSpill}. +  do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, Strategy, CFG, false).  strategy(Allocator) ->    case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) ->      'naive' -> 'fixed'    end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), +  CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). +  do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),    do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl index 821aa66c11..81064079aa 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -23,13 +23,16 @@  -export([check_and_rewrite/2]).  -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring) -> -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), -  #defun{code=Code0} = Defun, -  {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), -  VarRange = {0, hipe_gensym:get_var(ppc)}, -  {Defun#defun{code=Code1, var_range=VarRange}, -   DidSpill}. +check_and_rewrite(CFG, Coloring) -> +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp, no_context), +  do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), +  CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, CFG, DidSpill).  do_insns([I|Insns], TempMap, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl index f4781d5ed7..8f6d9779fc 100644 --- a/lib/hipe/ppc/hipe_ppc_registers.erl +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -201,6 +201,8 @@ is_arg(R) ->      _ -> false    end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_ppc_defuse:insn_defs_all_gpr/1  call_clobbered() ->		% does the RA strip the type or not?    [{?R0,tagged},{?R0,untagged},     %% R1 is reserved for C diff --git a/lib/hipe/ppc/hipe_ppc_subst.erl b/lib/hipe/ppc/hipe_ppc_subst.erl new file mode 100644 index 0000000000..5e43fd6471 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_subst.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2016. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% + +-module(hipe_ppc_subst). +-export([insn_temps/2]). +-include("hipe_ppc.hrl"). + +%% These should be moved to hipe_ppc and exported +-type temp()    :: #ppc_temp{}. +-type oper()    :: temp() | #ppc_simm16{} | #ppc_uimm16{}. +-type arg()     :: temp() | integer(). +-type insn()    :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> +  A = fun(O) -> arg_temps(T, O) end, +  O = fun(O) -> oper_temps(T, O) end, +  case I of +      #alu{dst=D,src1=L,src2=R} -> I#alu{dst=T(D),src1=T(L),src2=O(R)}; +      #b_label{} -> I; +      %% #bc{} -> I; +      #bctr{} -> I; +      #blr{} -> I; +      #cmp{src1=L,src2=R} -> I#cmp{src1=T(L),src2=O(R)}; +      #comment{} -> I; +      #label{} -> I; +      #load{dst=D,base=B} -> I#load{dst=T(D),base=T(B)}; +      #loadx{dst=D,base1=L,base2=R} -> I#loadx{dst=T(D),base1=T(L),base2=T(R)}; +      #mfspr{dst=D} -> I#mfspr{dst=T(D)}; +      #mtcr{src=S} -> I#mtcr{src=T(S)}; +      #mtspr{src=S} -> I#mtspr{src=T(S)}; +      #pseudo_bc{} -> I; +      #pseudo_call{func=F} when not is_record(F, ppc_temp) -> I; +      #pseudo_call_prepare{} -> I; +      #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; +      #pseudo_move{dst=D,src=S} -> I#pseudo_move{dst=T(D),src=T(S)}; +      #pseudo_tailcall{func=F,stkargs=Stk} when not is_record(F, ppc_temp) -> +	  I#pseudo_tailcall{stkargs=lists:map(A,Stk)}; +      #pseudo_tailcall_prepare{} -> I; +      #store{src=S,base=B} -> I#store{src=T(S),base=T(B)}; +      #storex{src=S,base1=L,base2=R} -> +	  I#storex{src=T(S),base1=T(L),base2=T(R)}; +      #unary{dst=D,src=S} -> I#unary{dst=T(D),src=T(S)}; +      #lfd{dst=D,base=B} -> I#lfd{dst=T(D),base=T(B)}; +      #lfdx{dst=D,base1=L,base2=R} -> I#lfdx{dst=T(D),base1=T(L),base2=T(R)}; +      #stfd{src=S,base=B} -> I#stfd{src=T(S),base=T(B)}; +      #stfdx{src=S,base1=L,base2=R} -> I#stfdx{src=T(S),base1=T(L),base2=T(R)}; +      #fp_binary{dst=D,src1=L,src2=R} -> +	  I#fp_binary{dst=T(D),src1=T(L),src2=T(R)}; +      #fp_unary{dst=D,src=S} -> I#fp_unary{dst=T(D),src=T(S)}; +      #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)} +  end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(SubstTemp,  T=#ppc_temp{}) -> SubstTemp(T); +oper_temps(_SubstTemp, I=#ppc_simm16{}) -> I; +oper_temps(_SubstTemp, I=#ppc_uimm16{}) -> I. + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp,  T=#ppc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl index a994659616..a01e67a789 100644 --- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -1031,7 +1031,7 @@ conv_return(I, Map, Data) ->    {I2, Map0, Data}.  conv_store(I, Map, Data) -> -  {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), +  {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map),    {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),    {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),    StoreSize = hipe_rtl:store_size(I), @@ -1056,13 +1056,28 @@ mk_store(Src, Base1, Base2, StoreSize) ->    end.  mk_store2(Src, Base1, Base2, StOp) -> -  case hipe_ppc:is_temp(Base2) of +  case hipe_ppc:is_temp(Base1) of      true -> -      mk_store_rr(Src, Base1, Base2, StOp); +      case hipe_ppc:is_temp(Base2) of +	true -> +	  mk_store_rr(Src, Base1, Base2, StOp); +	_ -> +	  mk_store_ri(Src, Base1, Base2, StOp) +      end;      _ -> -      mk_store_ri(Src, Base1, Base2, StOp) +      case hipe_ppc:is_temp(Base2) of +	true -> +	  mk_store_ri(Src, Base2, Base1, StOp); +	_ -> +	  mk_store_ii(Src, Base1, Base2, StOp) +      end    end. -   + +mk_store_ii(Src, Base, Disp, StOp) -> +  Tmp = new_untagged_temp(), +  mk_li(Tmp, Base, +	mk_store_ri(Src, Tmp, Disp, StOp)). +  mk_store_ri(Src, Base, Disp, StOp) ->    hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index aaa4418f37..209f230a9b 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -51,6 +51,7 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \  	  hipe_coalescing_regalloc \  	  hipe_graph_coloring_regalloc \  	  hipe_regalloc_loop \ +	  hipe_regalloc_prepass \  	  hipe_ls_regalloc \  	  hipe_ppc_specific hipe_ppc_specific_fp \  	  hipe_sparc_specific hipe_sparc_specific_fp \ @@ -123,7 +124,6 @@ $(EBIN)/hipe_amd64_specific_x87.beam: hipe_x86_specific_x87.erl  $(EBIN)/hipe_coalescing_regalloc.beam: ../main/hipe.hrl  $(EBIN)/hipe_graph_coloring_regalloc.beam: ../main/hipe.hrl  $(EBIN)/hipe_ig.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_spillcost.hrl -$(EBIN)/hipe_ig_moves.beam: ../util/hipe_vectors.hrl  $(EBIN)/hipe_ls_regalloc.beam: ../main/hipe.hrl  $(EBIN)/hipe_optimistic_regalloc.beam: ../main/hipe.hrl  $(EBIN)/hipe_regalloc_loop.beam: ../main/hipe.hrl diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 50e5869d45..890df1b81a 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -21,38 +21,47 @@  -module(hipe_amd64_specific_sse2). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]).  % The following exports are used as M:F(...) calls from other modules;  %% e.g. hipe_amd64_ra_ls. --export([analyze/1, -         bb/2, -         args/1, -         labels/1, -         livein/2, -         liveout/2, -         uses/1, -         defines/1, -	 def_use/1, -	 is_arg/1,	%% used by hipe_ls_regalloc -	 is_move/1, -	 is_fixed/1,	%% used by hipe_graph_coloring_regalloc -         is_global/1, -	 is_precoloured/1, -         reg_nr/1, -	 non_alloc/1, -	 allocatable/0, -         physical_name/1, -	 all_precoloured/0, -	 new_spill_index/1,	%% used by hipe_ls_regalloc -	 var_range/1, -         breadthorder/1, -         postorder/1, -         reverse_postorder/1]). +-export([analyze/2, +	 bb/3, +	 args/2, +	 labels/2, +	 livein/3, +	 liveout/3, +	 uses/2, +	 defines/2, +	 defines_all_alloc/2, +	 def_use/2, +	 is_arg/2,	%% used by hipe_ls_regalloc +	 is_move/2, +	 is_fixed/2,	%% used by hipe_graph_coloring_regalloc +	 is_global/2, +	 is_precoloured/2, +	 reg_nr/2, +	 non_alloc/2, +	 allocatable/1, +	 allocatable/2, +	 temp0/1, +	 physical_name/2, +	 all_precoloured/1, +	 new_spill_index/2,	%% used by hipe_ls_regalloc +	 var_range/2, +	 breadthorder/2, +	 postorder/2, +	 reverse_postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3, +	 check_and_rewrite/4]). + +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]).  %%---------------------------------------------------------------------------- @@ -60,86 +69,99 @@  %%---------------------------------------------------------------------------- -defun_to_cfg(Defun) -> -  hipe_x86_cfg:init(Defun). +check_and_rewrite(CFG, Coloring, no_context) -> +  hipe_amd64_ra_sse2_postconditions:check_and_rewrite(CFG, Coloring). -check_and_rewrite(Defun, Coloring) -> -  hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, Strategy, no_context) -> +  hipe_amd64_ra_sse2_postconditions:check_and_rewrite( +    CFG, Coloring, Strategy). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_x86_cfg:postorder(CFG). -is_global(_Reg) -> -  false. +is_global(Reg, _) -> +  hipe_amd64_registers:sse2_temp0() =:= Reg. -is_fixed(_Reg) -> +is_fixed(_Reg, _) ->    false. -is_arg(_Reg) -> +is_arg(_Reg, _) ->    false. --spec args(#cfg{}) -> []. -args(_CFG) -> +-spec args(#cfg{}, no_context) -> []. +args(_CFG, _) ->    []. -non_alloc(_) -> +non_alloc(_, _) ->    [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_amd64_liveness:analyze(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) ->    [X || X <- hipe_amd64_liveness:livein(Liveness, L),   	     hipe_x86:temp_is_allocatable(X),  	     hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) ->    [X || X <- hipe_amd64_liveness:liveout(BB_in_out_liveness, Label),   	     hipe_x86:temp_is_allocatable(X),  	     hipe_x86:temp_type(X) =:= 'double'].  %% Registers stuff -allocatable() -> -  hipe_amd64_registers:allocatable_sse2(). +allocatable(Ctx) -> +  allocatable('normal', Ctx). + +allocatable('normal', _) -> +  hipe_amd64_registers:allocatable_sse2(); +allocatable('linearscan', _) -> +  hipe_amd64_registers:allocatable_sse2() -- +    [hipe_amd64_registers:sse2_temp0()]. -all_precoloured() -> -  allocatable(). +temp0(_) -> +  hipe_amd64_registers:sse2_temp0(). -is_precoloured(Reg) -> -  lists:member(Reg,all_precoloured()). +all_precoloured(Ctx) -> +  allocatable(Ctx). -physical_name(Reg) -> +is_precoloured(Reg, Ctx) -> +  lists:member(Reg,all_precoloured(Ctx)). + +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(x86). --spec number_of_temporaries(#cfg{}) -> non_neg_integer(). -number_of_temporaries(_CFG) -> +-spec number_of_temporaries(#cfg{}, no_context) -> non_neg_integer(). +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(x86),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) ->    hipe_x86_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> +  hipe_x86_cfg:bb_add(CFG,L,BB). +  %% AMD64 stuff -def_use(Instruction) -> +def_use(Instruction, _) ->    {[X || X <- hipe_amd64_defuse:insn_def(Instruction),    	   hipe_x86:temp_is_allocatable(X),   	   hipe_x86:temp_type(X) =:= 'double'], @@ -148,17 +170,19 @@ def_use(Instruction) ->  	   hipe_x86:temp_type(X) =:= 'double']    }. -uses(I) -> +uses(I, _) ->    [X || X <- hipe_amd64_defuse:insn_use(I),   	     hipe_x86:temp_is_allocatable(X),   	     hipe_x86:temp_type(X) =:= 'double']. -defines(I) -> +defines(I, _) ->    [X || X <- hipe_amd64_defuse:insn_def(I),  	     hipe_x86:temp_is_allocatable(X),  	     hipe_x86:temp_type(X) =:= 'double']. -is_move(Instruction) -> +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). + +is_move(Instruction, _) ->    case hipe_x86:is_fmove(Instruction) of      true ->        Src = hipe_x86:fmove_src(Instruction), @@ -168,9 +192,26 @@ is_move(Instruction) ->      false -> false    end. -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_x86:temp_reg(Reg). --spec new_spill_index(non_neg_integer()) -> pos_integer(). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_reg_nr(_) -> +  hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, _Temp, _) -> +  hipe_x86:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> +  hipe_amd64_subst:insn_temps( +    fun(Op) -> +	case hipe_x86:temp_is_allocatable(Op) +	  andalso hipe_x86:temp_type(Op) =:= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). + +-spec new_spill_index(non_neg_integer(), no_context) -> pos_integer(). +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) ->    SpillIndex + 1. diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index 4e34cb1d99..06ab17b0e9 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -22,114 +22,123 @@  -module(hipe_arm_specific).  %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 -	 ,analyze/1 -	 ,labels/1 -	 ,all_precoloured/0 -	 ,bb/2 -	 ,liveout/2 -	 ,reg_nr/1 -	 ,def_use/1 -	 ,is_move/1 -	 ,is_precoloured/1 -	 ,var_range/1 -	 ,allocatable/0 -	 ,non_alloc/1 -	 ,physical_name/1 -	 ,reverse_postorder/1 -	 ,livein/2 -	 ,uses/1 -	 ,defines/1 +-export([number_of_temporaries/2 +	 ,analyze/2 +	 ,labels/2 +	 ,all_precoloured/1 +	 ,bb/3 +	 ,liveout/3 +	 ,reg_nr/2 +	 ,def_use/2 +	 ,is_move/2 +	 ,is_precoloured/2 +	 ,var_range/2 +	 ,allocatable/1 +	 ,non_alloc/2 +	 ,physical_name/2 +	 ,reverse_postorder/2 +	 ,livein/3 +	 ,uses/2 +	 ,defines/2 +	 ,defines_all_alloc/2  	]).  %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]).  %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_arm_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  hipe_arm_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> +  hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_arm_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> -  non_alloc(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). +non_alloc(CFG, no_context) -> +  non_alloc_1(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)).  %% same as hipe_arm_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_arm_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) ->    [X || X <- hipe_arm_liveness_gpr:livein(Liveness,L),  	hipe_arm:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) ->    [X || X <- hipe_arm_liveness_gpr:liveout(BB_in_out_liveness,Label),  	hipe_arm:temp_is_allocatable(X)].  %% Registers stuff -allocatable() -> +allocatable(no_context) ->    hipe_arm_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) ->    hipe_arm_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    hipe_arm_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) ->    hipe_arm_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_arm_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(arm). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(arm),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) ->    hipe_arm_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> +  hipe_arm_cfg:bb_add(CFG,L,BB). +  %% ARM stuff -def_use(Instruction) -> -  {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> +  {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) ->    [X || X <- hipe_arm_defuse:insn_use_gpr(I),  	hipe_arm:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) ->    [X || X <- hipe_arm_defuse:insn_def_gpr(I),  	hipe_arm:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> +  hipe_arm_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) ->    case hipe_arm:is_pseudo_move(Instruction) of      true ->        Dst = hipe_arm:pseudo_move_dst(Instruction), @@ -142,28 +151,43 @@ is_move(Instruction) ->      false -> false    end. -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_arm:temp_reg(Reg). +new_reg_nr(_) -> +  hipe_gensym:get_next_var(arm). + +update_reg_nr(Nr, Temp, _) -> +  hipe_arm:mk_temp(Nr, hipe_arm:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> +  hipe_arm_subst:insn_temps( +    fun(Op) -> +	case hipe_arm:temp_is_allocatable(Op) of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). +  %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) ->    SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_arm_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_arm_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) ->    R =:= hipe_arm_registers:temp1() orelse    R =:= hipe_arm_registers:temp2() orelse    R =:= hipe_arm_registers:temp3() orelse    hipe_arm_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) ->    hipe_arm_registers:is_arg(R). -args(CFG) -> +args(CFG, _) ->    hipe_arm_registers:args(hipe_arm_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl index e2f817d369..00bfbaa1b6 100644 --- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -30,7 +30,7 @@  %%-----------------------------------------------------------------------  -module(hipe_coalescing_regalloc). --export([regalloc/5]). +-export([regalloc/7]).  %%-ifndef(DEBUG).  %%-define(DEBUG,true). @@ -51,19 +51,21 @@  %%  %% Returns:  %%   Coloring    -- A coloring for specified CFG -%%   SpillIndex0 -- A new spill index +%%   SpillIndex2 -- A new spill index  %%----------------------------------------------------------------------- -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, TargetContext, +	 _Options) -> +  Target = {TargetMod, TargetContext},    %% Build interference graph    ?debug_msg("Build IG\n", []), -  IG = hipe_ig:build(CFG, Target), +  IG = hipe_ig:build(CFG, Liveness, TargetMod, TargetContext),    %% io:format("IG: ~p\n", [IG]),    ?debug_msg("Init\n", []), -  Num_Temps = Target:number_of_temporaries(CFG), +  Num_Temps = TargetMod:number_of_temporaries(CFG,TargetContext),    ?debug_msg("Coalescing RA: num_temps = ~p~n", [Num_Temps]), -  Allocatable = Target:allocatable(), +  Allocatable = TargetMod:allocatable(TargetContext),    K = length(Allocatable),    All_colors = colset_from_list(Allocatable), @@ -72,7 +74,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    Move_sets = hipe_moves:new(IG),    ?debug_msg("Build Worklist\n", []), -  Worklists = hipe_reg_worklists:new(IG, Target, CFG, Move_sets, K, Num_Temps), +  Worklists = hipe_reg_worklists:new(IG, TargetMod, TargetContext, CFG, +				     Move_sets, K, Num_Temps),    Alias = initAlias(Num_Temps),    ?debug_msg("Do coloring\n~p~n", [Worklists]), @@ -81,10 +84,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    %% io:format("SelStk0 ~w\n",[SelStk0]),    ?debug_msg("Init node sets\n", []),    Node_sets = hipe_node_sets:new(), -  %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), +  %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]),    ?debug_msg("Default coloring\n", []),    {Color0,Node_sets1} =  -    defaultColoring(Target:all_precoloured(), +    defaultColoring(TargetMod:all_precoloured(TargetContext),  		    initColor(Num_Temps), Node_sets, Target),    ?debug_msg("Assign colors\n", []), @@ -94,9 +97,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    %% io:format("color0:~w\nColor1:~w\nNodes:~w\nNodes2:~w\nNum_Temps:~w\n",[Color0,Color1,Node_sets,Node_sets2,Num_Temps]),    ?debug_msg("Build mapping ~p\n", [Node_sets2]), -  Coloring = build_namelist(Node_sets2, SpillIndex, Alias0, Color1), +  {Coloring, SpillIndex2} = +    build_namelist(Node_sets2, SpillIndex, Alias0, Color1),    ?debug_msg("Coloring ~p\n", [Coloring]), -  Coloring. +  {Coloring, SpillIndex2}.  %%----------------------------------------------------------------------  %% Function:    do_coloring @@ -379,7 +383,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) ->  	false -> % Colour case  	  Col = colset_smallest(OkColors),  	  NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), -	  Color1 = setColor(Node, Target:physical_name(Col), Color), +	  Color1 = setColor(Node, physical_name(Col,Target), Color),  	  assignColors(Stack1, NodeSets1, Color1, Alias, AllColors, Target)        end    end. @@ -402,7 +406,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) ->  defaultColoring([], Color, NodeSets, _Target) ->    {Color,NodeSets};  defaultColoring([Reg|Regs], Color, NodeSets, Target) -> -  Color1 = setColor(Reg,Target:physical_name(Reg), Color), +  Color1 = setColor(Reg,physical_name(Reg,Target), Color),    NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets),    defaultColoring(Regs, Color1, NodeSets1, Target). @@ -567,7 +571,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->        ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]),        Alias_src = getAlias(Source, Alias),        Alias_dst = getAlias(Dest, Alias), -      {U,V} = case Target:is_precoloured(Alias_dst) of +      {U,V} = case is_precoloured(Alias_dst,Target) of  		true -> {Alias_dst, Alias_src};  		false -> {Alias_src, Alias_dst}  	      end, @@ -577,7 +581,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->  	  Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target),  	  {Moves1, IG, Worklists1, Alias};  	 true -> -	  case (Target:is_precoloured(V) orelse +	  case (is_precoloured(V,Target) orelse  		hipe_ig:nodes_are_adjacent(U, V, IG)) of   	    true ->  	      Moves1 = Moves0, % drop constrained move Move @@ -585,7 +589,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->  	      Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target),  	      {Moves1, IG, Worklists2, Alias};  	    false -> -	      case (case Target:is_precoloured(U) of +	      case (case is_precoloured(U,Target) of  		      true ->  			AdjV = hipe_ig:node_adj_list(V, IG),  			all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -627,7 +631,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->  %%----------------------------------------------------------------------  add_worklist(Worklists, U, K, Moves, IG, Target) -> -  case (not(Target:is_precoloured(U)) +  case (not(is_precoloured(U,Target))  	andalso not(hipe_moves:move_related(U, Moves))  	andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of      true -> @@ -711,7 +715,7 @@ combine(U, V, IG, Worklists, Moves, Alias, K, Target) ->  combine_edges([], _U, IG, Worklists, Moves, _K, _Target) ->    {IG, Worklists, Moves}; -combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) ->    case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of      true -> combine_edges(Ts, U, IG, Worklists, Moves, K, Target);      _ -> @@ -728,7 +732,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) ->        %% worklist, and that's where decrement_degree() expects to find it.        %% This issue is not covered in the published algorithm.        OldDegree = hipe_ig:get_node_degree(T, IG), -      IG1 = hipe_ig:add_edge(T, U, IG, Target), +      IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx),        NewDegree = hipe_ig:get_node_degree(T, IG1),        Worklists0 =  	if NewDegree =:= K, OldDegree =:= K-1 -> @@ -767,7 +771,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) ->  ok(T, R, IG, K, Target) ->    ((hipe_ig:is_trivially_colourable(T, K, IG)) -   orelse Target:is_precoloured(T) +   orelse is_precoloured(T,Target)     orelse hipe_ig:nodes_are_adjacent(T, R, IG)).  %%---------------------------------------------------------------------- @@ -1028,3 +1032,15 @@ freezeEm3(_U, V, _M, K, WorkLists, Moves, IG, _Alias) ->      false ->        {WorkLists, Moves1}    end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +is_precoloured(R, {TgtMod,TgtCtx}) -> +  TgtMod:is_precoloured(R,TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> +  TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl index bc6e442236..e91734d8be 100644 --- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -51,7 +51,7 @@  %%   -module(hipe_graph_coloring_regalloc). --export([regalloc/5]). +-export([regalloc/7]).  %%-ifndef(DO_ASSERT).  %%-define(DO_ASSERT, true). @@ -77,18 +77,21 @@  %% that the coloring agrees with the interference graph (that is, that  %% no neighbors have the same register or spill location). -%% @spec regalloc(#cfg{}, non_neg_fixnum(), non_neg_fixnum(), atom(), list()) -> {, non_neg_fixnum()} +%% @spec regalloc(#cfg{}, liveness(), non_neg_fixnum(), non_neg_fixnum(), +%%                module(), tgt_ctx(), list()) -> {, non_neg_fixnum()} -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> -  PhysRegs = Target:allocatable(), +regalloc(CFG, Live, SpillIndex, SpillLimit, TargetMod, TargetContext, +	 _Options) -> +  Target = {TargetMod, TargetContext}, +  PhysRegs = allocatable(Target),    ?report2("building IG~n", []), -  {IG, Spill} = build_ig(CFG, Target), +  {IG, Spill} = build_ig(CFG, Live, Target),    %% check_ig(IG),    ?report3("graph: ~p~nphysical regs: ~p~n", [list_ig(IG), PhysRegs]),    %% These nodes *can't* be allocated to registers.  -  NotAllocatable = [Target:reg_nr(X) || X <- Target:non_alloc(CFG)], +  NotAllocatable = non_alloc(CFG, Target),    %% i.e. Arguments on x86    ?report2("Nonalloc ~w~n", [NotAllocatable]), @@ -97,7 +100,7 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->  	  ordsets:from_list(PhysRegs),   	  SpillIndex,  	  SpillLimit, -	  Target:number_of_temporaries(CFG), +	  number_of_temporaries(CFG, Target),  	  Target, NotAllocatable),    Coloring = [{X, {reg, X}} || X <- NotAllocatable] ++ Cols,    ?ASSERT(check_coloring(Coloring, IG, Target)), @@ -112,15 +115,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->  %% Returns {Interference_graph, Spill_cost_dictionary}  %% -build_ig(CFG, Target) -> -  try build_ig0(CFG, Target) -  catch error:Rsn -> exit({?MODULE, build_ig, Rsn}) -  end. - -build_ig0(CFG, Target) -> -  Live = Target:analyze(CFG), -  NumN = Target:number_of_temporaries(CFG),  % poss. N-1? -  {IG, Spill} = build_ig_bbs(Target:labels(CFG),  +build_ig(CFG, Live, Target) -> +  NumN = number_of_temporaries(CFG, Target),  % poss. N-1? +  {IG, Spill} = build_ig_bbs(labels(CFG, Target),  			     CFG,   			     Live,  			     empty_ig(NumN),  @@ -208,17 +205,8 @@ set_spill_cost(X, N, Spill) ->  %%     * add low-degree neighbors of z to low  %%     * restart the while-loop above -color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, NotAllocatable) -> -   try color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, -	       NumNodes, Target, NotAllocatable) -   catch -     error:Rsn -> -       ?error_msg("Coloring failed with ~p~n", [Rsn]), -       ?EXIT(Rsn) -   end. - -color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, -	NotAllocatable) ->  +color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, +      NotAllocatable) ->    ?report("simplification of IG~n", []),    K = ordsets:size(PhysRegs),    Nodes = list_ig(IG), @@ -234,7 +222,7 @@ color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target,    ?report(" starting with low degree nodes ~p~n",[Low]),    EmptyStk = [], -  Precolored = Target:all_precoloured(), +  Precolored = all_precoloured(Target),    {Stk, NewSpillIx} =       simplify(Low, NumNodes, Precolored,  	     IG, Spill, K, SpillIx, EmptyStk, @@ -415,7 +403,7 @@ spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) ->  	true ->  	  spill_costs(Ns,IG,Vis,Spill, SpillLimit, Target);  	_ -> -	  case Target:is_fixed(N) of +	  case is_fixed(N, Target) of  	    true ->  	      spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target);  	    false -> @@ -772,18 +760,36 @@ valid_coloring(X, C, [_|Ys]) ->  %% *** INTERFACES TO OTHER MODULES ***  %% -liveout(CFG, L, Target) -> -  ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +all_precoloured({TgtMod,TgtCtx}) -> +  TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> +  TgtMod:allocatable(TgtCtx). + +is_fixed(Reg, {TgtMod,TgtCtx}) -> +  TgtMod:is_fixed(Reg, TgtCtx). + +labels(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> +  ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). + +bb(CFG, L, {TgtMod,TgtCtx}) -> +  hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). + +def_use(X, Target={TgtMod,TgtCtx}) -> +  {ordsets:from_list(reg_names(TgtMod:defines(X,TgtCtx), Target)), +   ordsets:from_list(reg_names(TgtMod:uses(X,TgtCtx), Target))}. -bb(CFG, L, Target) -> -  hipe_bb:code(Target:bb(CFG, L)). +non_alloc(CFG, Target={TgtMod,TgtCtx}) -> +  reg_names(TgtMod:non_alloc(CFG, TgtCtx), Target). -def_use(X, Target) -> -  {ordsets:from_list(reg_names(Target:defines(X), Target)),  -   ordsets:from_list(reg_names(Target:uses(X), Target))}. +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:number_of_temporaries(CFG, TgtCtx). -reg_names(Regs, Target) -> -  [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].  %%  %% Precoloring: use this version when a proper implementation of @@ -803,5 +809,5 @@ precolor0([R|Rs], Cols, Target) ->    {[{R, {reg, physical_name(R, Target)}}|Cs],      set_color(R, physical_name(R, Target), Cols1)}. -physical_name(X, Target) -> -  Target:physical_name(X). +physical_name(X, {TgtMod,TgtCtx}) -> +  TgtMod:physical_name(X, TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ig.erl b/lib/hipe/regalloc/hipe_ig.erl index 8fd5d0df1f..81eee2e03c 100644 --- a/lib/hipe/regalloc/hipe_ig.erl +++ b/lib/hipe/regalloc/hipe_ig.erl @@ -28,7 +28,7 @@  -module(hipe_ig). --export([build/2,  +-export([build/4,  	 nodes_are_adjacent/3,  	 node_spill_cost/2,  	 node_adj_list/2, @@ -38,8 +38,8 @@  	 spill_costs/1,  	 adj_list/1,  	 %% adj_set/1, -	 add_edge/4, -	 remove_edge/4, +	 add_edge/5, +	 remove_edge/5,  	 %% set_adj_set/2,  	 %% set_adj_list/2,  	 %% set_ig_moves/2, @@ -64,6 +64,9 @@  -include("../flow/cfg.hrl").  -include("hipe_spillcost.hrl"). +-type target_context() :: any(). +-type target() :: {TargetMod :: module(), TargetContext :: target_context()}. +  %%----------------------------------------------------------------------  -record(igraph, {adj_set, adj_list, ig_moves, degree, @@ -78,11 +81,11 @@  %% degree, and testing for trivial colourability (degree < K).  %%---------------------------------------------------------------------- -degree_new(No_temporaries, Target) -> +degree_new(No_temporaries, {TargetMod, TargetCtx}) ->    Degree = hipe_bifs:array(No_temporaries, 0), -  K = length(Target:allocatable()), +  K = length(TargetMod:allocatable(TargetCtx)),    Inf = K + No_temporaries, -  precoloured_to_inf_degree(Target:all_precoloured(), Inf, Degree). +  precoloured_to_inf_degree(TargetMod:all_precoloured(TargetCtx), Inf, Degree).  precoloured_to_inf_degree([], _Inf, Degree) -> Degree;  precoloured_to_inf_degree([P|Ps], Inf, Degree) -> @@ -344,7 +347,7 @@ set_spill_costs(Spill_costs, IG) -> IG#igraph{spill_costs = Spill_costs}.  %%   A new interference record  %%---------------------------------------------------------------------- --spec initial_ig(non_neg_integer(), atom()) -> #igraph{}. +-spec initial_ig(non_neg_integer(), target()) -> #igraph{}.  initial_ig(NumTemps, Target) ->    #igraph{adj_set     = adjset_new(NumTemps), @@ -361,20 +364,21 @@ initial_ig(NumTemps, Target) ->  %% Description: Constructs an interference graph for the specifyed CFG.  %%  %% Parameters: -%%   CFG     -- A Control Flow Graph -%%   Target  -- The module that contains the target-specific functions +%%   CFG       -- A Control Flow Graph +%%   TargetMod -- The module that contains the target-specific functions +%%   TargetCtx -- Context data to pass to TargetMod  %%  %% Returns:   %%   An interference graph for the given CFG.  %%---------------------------------------------------------------------- --spec build(#cfg{}, atom()) -> #igraph{}. +-spec build(#cfg{}, Liveness::_, module(), target_context()) -> #igraph{}. -build(CFG, Target) -> -  BBs_in_out_liveness = Target:analyze(CFG), -  Labels = Target:labels(CFG), +build(CFG, BBs_in_out_liveness, TargetMod, TargetCtx) -> +  Target = {TargetMod, TargetCtx}, +  Labels = TargetMod:labels(CFG, TargetCtx),    %% How many temporaries exist? -  NumTemps = Target:number_of_temporaries(CFG), +  NumTemps = TargetMod:number_of_temporaries(CFG, TargetCtx),    IG0 = initial_ig(NumTemps, Target),    %%?debug_msg("initial adjset: ~p\n",[element(2, IG0)]),    %%?debug_msg("initial adjset array: ~.16b\n",[element(3, element(2, IG0))]), @@ -395,7 +399,7 @@ build(CFG, Target) ->  %%   CFG                  --  The Control Flow Graph that we constructs   %%                            the interference graph from.  %%   Target               --  The module containing the target-specific -%%                            functions +%%                            functions, along with its context data  %%  %% Returns:   %%   An interference graph for the given CFG. @@ -404,13 +408,11 @@ build(CFG, Target) ->  analyze_bbs([], _, IG, _, _) -> IG;  analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) ->      % Get basic block associated with label L -    BB = Target:bb(CFG, L), +    BB = bb(CFG, L, Target),      % Get basic block code      BB_code = hipe_bb:code(BB), -    % Temporaries that are live out from this basic block -    BB_liveout = Target:liveout(BBs_in_out_liveness, L), -    % Only temporary numbers -    BB_liveout_numbers = reg_numbers(BB_liveout, Target), +    % Temporaries that are live out from this basic block, only numbers +    BB_liveout_numbers = liveout(BBs_in_out_liveness, L, Target),      % {Liveness, New Interference Graph}      {_, New_ig, Ref} = analyze_bb_instructions(BB_code,  					       ordsets:from_list(BB_liveout_numbers), @@ -433,7 +435,8 @@ analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) ->  %%   Live          --  All temporaries that are live at the time.  %%                     Live is a set of temporary "numbers only".  %%   IG            --  The interference graph in it's current state -%%   Target        --  The mopdule containing the target-specific functions +%%   Target        --  The mopdule containing the target-specific functions, +%%                     along with its context data.  %%  %% Returns:   %%   Live  --  Temporaries that are live at entery of basic block @@ -449,7 +452,7 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) ->    {Live0, IG0, Ref} = analyze_bb_instructions(Instructions, Live,   					      IG, Target),    %% Check for temporaries that are defined and used in instruction -  {Def, Use} = Target:def_use(Instruction), +  {Def, Use} = def_use(Instruction, Target),    %% Convert to register numbers    Def_numbers = ordsets:from_list(reg_numbers(Def, Target)),    Use_numbers = ordsets:from_list(reg_numbers(Use, Target)), @@ -501,14 +504,15 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) ->  %%   Def_numbers  --  Temporaries that are defined at this instruction  %%   Use_numbers  --  Temporaries that are used at this instruction  %%   IG           --  The interference graph in its current state -%%   Target       --  The module containing the target-specific functions +%%   Target       --  The module containing the target-specific functions, along +%%                    with its context data  %% Returns:  %%   Live  --  An updated live set  %%   IG    --  An updated interference graph  %%----------------------------------------------------------------------  analyze_move(Instruction, Live, Def_numbers, Use_numbers, IG, Target) -> -  case Target:is_move(Instruction) of +  case is_move(Instruction,Target) of      true ->        case {Def_numbers, Use_numbers} of  	{[Dst], [Src]} -> @@ -554,8 +558,9 @@ interfere([Define|Defines], Living, IG, Target) ->  %%   Live       --  Current live set  %%   Lives      --  Rest of living temporaries.  %%   IG         --  An interference graph -%%   Target     --  The module containing the target-specific functions -%% Returns:  +%%   Target     --  The module containing the target-specific functions, along +%%                  with its context data. +%% Returns:  %%   An updated interference graph  %%---------------------------------------------------------------------- @@ -623,11 +628,15 @@ get_moves(IG) ->  %% Parameters:  %%   U          --  A temporary number  %%   V          --  A temporary number -%%   Target     --  The module containing the target-specific functions +%%   TargetMod  --  The module containing the target-specific functions. +%%   TargetCtx  --  Context data to pass to TargetMod  %% Returns:   %%   An updated interference graph.  %%---------------------------------------------------------------------- +add_edge(U, V, IG, TargetMod, TargetCtx) -> +  add_edge(U, V, IG, {TargetMod, TargetCtx}). +  add_edge(U, U, IG, _) -> IG;  add_edge(U, V, IG, Target) ->    case nodes_are_adjacent(U, V, IG) of @@ -652,11 +661,15 @@ add_edge(U, V, IG, Target) ->  %% Parameters:  %%   U          --  A temporary number  %%   V          --  A temporary number -%%   Target     --  The module containing the target-specific functions +%%   TargetMod  --  The module containing the target-specific functions. +%%   TargetCtx  --  Context data for TargetMod.  %% Returns:   %%   An updated interference graph.  %%---------------------------------------------------------------------- +remove_edge(U, V, IG, TargetMod, TargetCtx) -> +  remove_edge(U, V, IG, {TargetMod, TargetCtx}). +  remove_edge(U, U, IG, _) -> IG;  remove_edge(U, V, IG, Target) ->    case nodes_are_adjacent(U, V, IG) of @@ -683,8 +696,8 @@ remove_edge(U, V, IG, Target) ->  %%                             precoloured.  %%   Adj_list             --  An adj_list  %%   Degree               --  The degree that all nodes currently have -%%   Target               --  The module containing the target-specific  -%%                            functions +%%   Target               --  The module containing the target-specific +%%                            functions, along with its context data.  %%  %% Returns:   %%   Adj_list  --  An updated adj_list data structure @@ -692,7 +705,7 @@ remove_edge(U, V, IG, Target) ->  %%----------------------------------------------------------------------  remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> -  case Target:is_precoloured(Temp) of +  case is_precoloured(Temp,Target) of      false ->        New_adj_list = hipe_adj_list:remove_edge(Temp, InterfereTemp, Adj_list),        degree_dec(Temp, Degree), @@ -714,8 +727,8 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) ->  %%                             precoloured.  %%   Adj_list             --  An adj_list  %%   Degree               --  The degree that all nodes currently have -%%   Target               --  The module containing the target-specific  -%%                            functions +%%   Target               --  The module containing the target-specific +%%                            functions, along with its context data.  %%  %% Returns:   %%   Adj_list  --  An updated adj_list data structure @@ -723,7 +736,7 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) ->  %%----------------------------------------------------------------------  interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> -  case Target:is_precoloured(Temp) of +  case is_precoloured(Temp, Target) of      false ->        New_adj_list = hipe_adj_list:add_edge(Temp, InterfereTemp, Adj_list),        degree_inc(Temp, Degree), @@ -740,13 +753,14 @@ interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) ->  %%  %% Parameters:  %%   TRs     -- A list of temporary registers -%%   Target  -- The module containing the target-specific functions +%%   Target  -- The module containing the target-specific functions, along with +%%              its context data.  %% Returns:   %%   A list of register numbers.  %%---------------------------------------------------------------------- -reg_numbers(Regs, Target) ->  -  [Target:reg_nr(X) || X <- Regs]. +reg_numbers(Regs, {TgtMod, TgtCtx}) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs].  %%---------------------------------------------------------------------  %% Print functions - only used for debugging @@ -775,3 +789,24 @@ dec_node_degree(Node, IG) ->  is_trivially_colourable(Node, K, IG) ->    degree_is_trivially_colourable(Node, K, degree(IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +bb(CFG, L, {TgtMod,TgtCtx}) -> +  TgtMod:bb(CFG,L,TgtCtx). + +def_use(Instruction, {TgtMod,TgtCtx}) -> +  TgtMod:def_use(Instruction, TgtCtx). + +is_move(Instruction, {TgtMod,TgtCtx}) -> +  TgtMod:is_move(Instruction, TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> +  TgtMod:is_precoloured(R,TgtCtx). + +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> +  reg_numbers(TgtMod:liveout(Liveness,L,TgtCtx), Target). diff --git a/lib/hipe/regalloc/hipe_ig_moves.erl b/lib/hipe/regalloc/hipe_ig_moves.erl index b679453de0..2a70606dab 100644 --- a/lib/hipe/regalloc/hipe_ig_moves.erl +++ b/lib/hipe/regalloc/hipe_ig_moves.erl @@ -25,8 +25,6 @@  	 new_move/3,  	 get_moves/1]). --include("../util/hipe_vectors.hrl"). -  %%-----------------------------------------------------------------------------  %% The main data structure; its fields are:  %%  - movelist  : mapping from temp to set of associated move numbers @@ -34,11 +32,13 @@  %%  - moveinsns : list of move instructions, in descending move number order  %%  - moveset   : set of move instructions --record(ig_moves, {movelist                    :: hipe_vector(),	 +-record(ig_moves, {movelist                    :: movelist(),  		   nrmoves   = 0               :: non_neg_integer(),  		   moveinsns = []              :: [{_,_}],  		   moveset   = gb_sets:empty() :: gb_sets:set()}). +-type movelist() :: hipe_vectors:vector(ordsets:ordset(non_neg_integer())). +  %%-----------------------------------------------------------------------------  -spec new(non_neg_integer()) -> #ig_moves{}. @@ -66,7 +66,8 @@ new_move(Dst, Src, IG_moves) ->  			moveset = gb_sets:insert(MoveInsn, MoveSet)}    end. --spec add_movelist(non_neg_integer(), non_neg_integer(), hipe_vector()) -> hipe_vector(). +-spec add_movelist(non_neg_integer(), non_neg_integer(), movelist()) +		  -> movelist().  add_movelist(MoveNr, Temp, MoveList) ->    AssocMoves = hipe_vectors:get(MoveList, Temp), @@ -74,7 +75,7 @@ add_movelist(MoveNr, Temp, MoveList) ->    %% ordset due to the ordsets:union in hipe_coalescing_regalloc:combine().    hipe_vectors:set(MoveList, Temp, ordsets:add_element(MoveNr, AssocMoves)). --spec get_moves(#ig_moves{}) -> {hipe_vector(), non_neg_integer(), tuple()}. +-spec get_moves(#ig_moves{}) -> {movelist(), non_neg_integer(), tuple()}.  get_moves(IG_moves) -> % -> {MoveList, NrMoves, MoveInsns}    {IG_moves#ig_moves.movelist, diff --git a/lib/hipe/regalloc/hipe_ls_regalloc.erl b/lib/hipe/regalloc/hipe_ls_regalloc.erl index d24b803524..0db18f5c62 100644 --- a/lib/hipe/regalloc/hipe_ls_regalloc.erl +++ b/lib/hipe/regalloc/hipe_ls_regalloc.erl @@ -56,7 +56,7 @@  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  -module(hipe_ls_regalloc). --export([regalloc/7]). +-export([regalloc/9]).  %%-define(DEBUG,1).  -define(HIPE_INSTRUMENT_COMPILER, true). @@ -95,11 +95,10 @@  %%   </ol>  %% @end  %%-  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +	 TargetMod, TargetContext) -> +  Target = {TargetMod, TargetContext},    ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), -  %%     Step 1: Calculate liveness (Call external implementation.) -  Liveness = liveness(CFG, Target), -  ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]),    USIntervals = calculate_intervals(CFG, Liveness,  				    Entrypoints, Options, Target),    ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -108,10 +107,10 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) ->    %% ?debug_msg("Intervals ~w\n", [Intervals]),    ?debug_msg("No intervals: ~w\n",[length(Intervals)]),    ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), -  Allocation = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), +  {Coloring, NewSpillIndex} +    = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target),    ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), -  Allocation. - +  {Coloring, NewSpillIndex}.  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  %%                                                                    %% @@ -125,32 +124,33 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) ->  %%  Liveness: A map of live-in and live-out sets for each Basic-Block.  %%  Entrypoints: A set of BB names that have external entrypoints.  %% -calculate_intervals(CFG,Liveness,_Entrypoints, Options, Target) -> +calculate_intervals(CFG,Liveness,_Entrypoints, Options, +		    Target={TgtMod,TgtCtx}) ->    %% Add start point for the argument registers.    Args = arg_vars(CFG, Target),    Interval =  -    add_def_point(Args, 0, empty_interval(Target:number_of_temporaries(CFG))), +    add_def_point(Args, 0, empty_interval(number_of_temporaries(CFG, Target))),    %% Interval = add_livepoint(Args, 0, empty_interval()),    Worklist =      case proplists:get_value(ls_order, Options) of        reversepostorder -> -	Target:reverse_postorder(CFG); +	TgtMod:reverse_postorder(CFG, TgtCtx);        breadth -> -	Target:breadthorder(CFG); +	TgtMod:breadthorder(CFG, TgtCtx);        postorder -> -	Target:postorder(CFG); +	TgtMod:postorder(CFG, TgtCtx);        inorder -> -	Target:inorder(CFG); +	TgtMod:inorder(CFG, TgtCtx);        reverse_inorder -> -	Target:reverse_inorder(CFG); +	TgtMod:reverse_inorder(CFG, TgtCtx);        preorder -> -	Target:preorder(CFG); +	TgtMod:preorder(CFG, TgtCtx);        prediction -> -	Target:predictionorder(CFG); +	TgtMod:predictionorder(CFG, TgtCtx);        random -> -	Target:labels(CFG); +	TgtMod:labels(CFG, TgtCtx);        _ -> -	Target:reverse_postorder(CFG) +	TgtMod:reverse_postorder(CFG, TgtCtx)      end,    %% ?inc_counter(bbs_counter, length(Worklist)),    %% ?debug_msg("No BBs ~w\n",[length(Worklist)]), @@ -290,7 +290,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) ->  			   alloc(OtherTemp,NewPhys,NewAlloc),  			   SpillIndex, DontSpill, Target);  		false -> -		  NewSpillIndex = Target:new_spill_index(SpillIndex), +		  NewSpillIndex = new_spill_index(SpillIndex, Target),  		  {NewAlloc2, NewActive4} =   		    spill(OtherTemp, OtherEnd, OtherStart, NewActive3,   			  NewAlloc, SpillIndex, DontSpill, Target), @@ -306,7 +306,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) ->        case NewFree of   	[] ->   	  %% No physical registers available, we have to spill. -	  NewSpillIndex = Target:new_spill_index(SpillIndex), +	  NewSpillIndex = new_spill_index(SpillIndex, Target),  	  {NewAlloc, NewActive2} =   	    spill(Temp, endpoint(RegInt), startpoint(RegInt),  		  Active, Alloc, SpillIndex, DontSpill, Target), @@ -752,38 +752,41 @@ create_freeregs([]) ->  %%   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> -  Target:analyze(CFG). +bb(CFG, L, {TgtMod, TgtCtx}) -> +  TgtMod:bb(CFG,L,TgtCtx). + +livein(Liveness,L, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:livein(Liveness,L,TgtCtx), Target). -bb(CFG, L, Target) -> -  Target:bb(CFG,L). +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:liveout(Liveness,L,TgtCtx), Target). -livein(Liveness,L, Target) -> -  regnames(Target:livein(Liveness,L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:uses(I,TgtCtx), Target). -liveout(Liveness,L, Target) -> -  regnames(Target:liveout(Liveness,L), Target). +defines(I, Target={TgtMod,TgtCtx}) -> +  regnames(TgtMod:defines(I,TgtCtx), Target). -uses(I, Target) -> -  regnames(Target:uses(I), Target). +is_precoloured(R, {TgtMod,TgtCtx}) -> +  TgtMod:is_precoloured(R,TgtCtx). -defines(I, Target) -> -  regnames(Target:defines(I), Target). +is_global(R, {TgtMod,TgtCtx}) -> +  TgtMod:is_global(R,TgtCtx). -is_precoloured(R, Target) -> -  Target:is_precoloured(R). +new_spill_index(SpillIndex, {TgtMod,TgtCtx}) -> +  TgtMod:new_spill_index(SpillIndex, TgtCtx). -is_global(R, Target) -> -  Target:is_global(R). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:number_of_temporaries(CFG, TgtCtx). -physical_name(R, Target) -> -  Target:physical_name(R). +physical_name(R, {TgtMod,TgtCtx}) -> +  TgtMod:physical_name(R,TgtCtx). -regnames(Regs, Target) -> -  [Target:reg_nr(X) || X <- Regs]. +regnames(Regs, {TgtMod,TgtCtx}) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -arg_vars(CFG, Target) -> -  Target:args(CFG). +arg_vars(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:args(CFG,TgtCtx). -is_arg(Reg, Target) -> -  Target:is_arg(Reg). +is_arg(Reg, {TgtMod,TgtCtx}) -> +  TgtMod:is_arg(Reg,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl index 2ed9ec3b45..031c799a2c 100644 --- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -29,7 +29,7 @@  %%-----------------------------------------------------------------------  -module(hipe_optimistic_regalloc). --export([regalloc/5]). +-export([regalloc/7]).  -ifndef(DEBUG).  %%-define(DEBUG,true). @@ -74,20 +74,22 @@  %%   SpillLimit  -- Temporaris with numbers higher than this have  %%                  infinit spill cost.   %%                  Consider changing this to a set. -%%   Target      -- The module containing the target-specific functions. +%%   TgtMod      -- The module containing the target-specific functions. +%%   TgtCtx      -- Context data for TgtMod  %%  %% Returns:  %%   Coloring    -- A coloring for specified CFG -%%   SpillIndex0 -- A new spill index +%%   SpillIndex2 -- A new spill index  %%-----------------------------------------------------------------------  -ifdef(COMPARE_ITERATED_OPTIMISTIC). -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> -  ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> +  Target = {TgtMod, TgtCtx}, +  ?debug_msg("optimistic ~w\n",[TgtMod]),    ?debug_msg("CFG: ~p\n",[CFG]),    %% Build interference graph    ?debug_msg("Build IG\n",[]), -  IG_O = hipe_ig:build(CFG, Target), -  IG = hipe_ig:build(CFG, Target), +  IG_O = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), +  IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx),    ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]),    ?debug_msg("IG:\n",[]),    ?print_adjacent(IG), @@ -98,9 +100,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    SavedAdjList = hipe_ig:adj_list(IG),    ?debug_msg("Init\n",[]), -  No_temporaries = Target:number_of_temporaries(CFG), +  No_temporaries = number_of_temporaries(CFG, Target),    ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), -  Allocatable = Target:allocatable(), +  Allocatable = allocatable(Target),    K = length(Allocatable),    All_colors = colset_from_list(Allocatable),    ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]),  @@ -113,11 +115,13 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?mov_print_memberships(Move_sets),    ?debug_msg("Build Worklist\n",[]), -  Worklists_O = hipe_reg_worklists:new(IG_O, Target, CFG, Move_sets_O, K, No_temporaries), +  Worklists_O = hipe_reg_worklists:new(IG_O, TgtMod, TgtCtx, CFG, Move_sets_O, +				       K, No_temporaries),    ?debug_msg("Worklists:\n ~p\n", [Worklists_O]),    ?reg_print_memberships(Worklists_O), -  Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), +  Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, +				     No_temporaries),    ?debug_msg("New Worklists:\n ~p\n", [Worklists]),    ?reg_print_memberships(Worklists), @@ -175,10 +179,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?debug_msg("Init node sets\n",[]),    Node_sets = hipe_node_sets:new(), -  %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), +  %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]),    ?debug_msg("Default coloring\n",[]),    {Color0,Node_sets1} =  -    defaultColoring(Target:all_precoloured(), +    defaultColoring(all_precoloured(Target),  		    initColor(No_temporaries), Node_sets, Target),    ?debug_msg("Color0\n",[]),    ?print_colors(No_temporaries, Color0), @@ -199,9 +203,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]),    ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), -  Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), +  {Coloring,SpillIndex2} = +    build_namelist(Node_sets2,SpillIndex,Alias2,Color1),    ?debug_msg("Coloring ~p\n",[Coloring]), -  SortedColoring = { sort_stack(element(1, Coloring)), element(2, Coloring)}, +  SortedColoring = {sort_stack(Coloring), SpillIndex2},    ?debug_msg("SortedColoring ~p\n",[SortedColoring]),    %%Coloring.    ?debug_msg("----------------------Assign colors _O\n",[]), @@ -217,14 +222,15 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    SortedColoring_O = {sort_stack(element(1, Coloring_O)), element(2, Coloring_O)},    ?debug_msg("SortedColoring_O ~p\n",[SortedColoring_O]),    sanity_compare(SortedColoring_O, SortedColoring), -  Coloring. +  {Coloring,SpillIndex2}.  -else. -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> -  ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> +  Target = {TgtMod, TgtCtx}, +  ?debug_msg("optimistic ~w\n",[TgtMod]),    ?debug_msg("CFG: ~p\n",[CFG]),    %% Build interference graph    ?debug_msg("Build IG\n",[]), -  IG = hipe_ig:build(CFG, Target), +  IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx),    ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]),    ?debug_msg("IG:\n",[]),    ?print_adjacent(IG), @@ -235,9 +241,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    SavedAdjList = hipe_ig:adj_list(IG),    ?debug_msg("Init\n",[]), -  No_temporaries = Target:number_of_temporaries(CFG), +  No_temporaries = number_of_temporaries(CFG, Target),    ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), -  Allocatable = Target:allocatable(), +  Allocatable = allocatable(Target),    K = length(Allocatable),    All_colors = colset_from_list(Allocatable),    ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]),  @@ -250,7 +256,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?debug_msg("Build Worklist\n",[]), -  Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), +  Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, +				     No_temporaries),    ?debug_msg("New Worklists:\n ~p\n", [Worklists]),    ?reg_print_memberships(Worklists), @@ -292,10 +299,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?debug_msg("Init node sets\n",[]),    Node_sets = hipe_node_sets:new(), -  %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), +  %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]),    ?debug_msg("Default coloring\n",[]),    {Color0,Node_sets1} =  -    defaultColoring(Target:all_precoloured(), +    defaultColoring(all_precoloured(Target),  		    initColor(No_temporaries), Node_sets, Target),    ?debug_msg("Color0\n",[]),    ?print_colors(No_temporaries, Color0), @@ -316,9 +323,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) ->    ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]),    ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), -  Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), +  {Coloring, SpillIndex2} = build_namelist(Node_sets2,SpillIndex,Alias2,Color1),    ?debug_msg("Coloring ~p\n",[Coloring]), -  Coloring. +  {Coloring,SpillIndex2}.  -endif.  %%---------------------------------------------------------------------- @@ -834,7 +841,8 @@ sort_stack_split(Pivot, [H|T], Smaller, Bigger) ->  %%                       been coalesced, this mapping shows the alias for that   %%                       node.  %%   AllColors       --  This is an ordset containing all the available colors -%%   Target          --  The module containing the target-specific functions. +%%   Target          --  The module containing the target-specific functions, +%%                       along with its context data.  %%  %% Returns:  %%   Color          --  A mapping from nodes to their respective color. @@ -874,7 +882,7 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries,  	false -> % Color case  	  Col = colset_smallest(OkColors),  	  NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), -	  Color1 = setColor(Node, Target:physical_name(Col), Color), +	  Color1 = setColor(Node, physical_name(Col,Target), Color),  	  ?debug_msg("Color case. Assigning color ~p to node.~n", [Col]),  	  assignColors(Worklists, Stack1, NodeSets1, Color1, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target)        end @@ -902,7 +910,8 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries,  %%   Alias           --  This is a mapping from nodes to nodes. If a node has   %%                       been coalesced, this mapping shows the alias for that   %%                       node. -%%   Target          --  The module containing the target-specific functions. +%%   Target          --  The module containing the target-specific functions, +%%                       along with its context data.  %%  %% Returns:  %%   Alias           --  The restored aliases after the uncoalescing. @@ -1006,7 +1015,7 @@ colorSplit([], _Col, NodeSets, Color, _Target) ->  colorSplit([Node|Nodes], Col, NodeSets, Color, Target) ->    ?debug_msg("  Coloring node ~p with color ~p.~n", [Node, Col]),    NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), -  Color1 = setColor(Node, Target:physical_name(Col), Color), +  Color1 = setColor(Node, physical_name(Col,Target), Color),    colorSplit(Nodes, Col, NodeSets1, Color1, Target).  %% Place non-colorable nodes in a split at the bottom of the SelectStack. @@ -1035,7 +1044,8 @@ enqueueSplit([Node|Nodes], IG, Stack) ->  %%                      node.  %%   AllColors      --  This is an ordset containing all the available colors  %% -%%   Target         --  The module containing the target-specific functions. +%%   Target         --  The module containing the target-specific functions, +%%                      along with its context data.  %%  %% Returns:  %%   Color          --  A mapping from nodes to their respective color. @@ -1065,7 +1075,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) ->  	false -> % Colour case  	  Col = colset_smallest(OkColors),  	  NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), -	  Color1 = setColor(Node, Target:physical_name(Col), Color), +	  Color1 = setColor(Node, physical_name(Col,Target), Color),  	  assignColors_O(Stack1, NodeSets1, Color1, Alias, AllColors, Target)        end    end. @@ -1079,7 +1089,8 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) ->  %%   Regs           -- The list of registers to be default colored  %%   Color          -- The color mapping that shall be changed  %%   NodeSets       -- The node sets that shall be updated -%%   Target         -- The module containing the target-specific functions. +%%   Target         -- The module containing the target-specific functions, +%%                     along with its context data.  %%  %% Returns:  %%   NewColor       -- The updated color mapping @@ -1089,7 +1100,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) ->  defaultColoring([], Color, NodeSets, _Target) ->    {Color,NodeSets};  defaultColoring([Reg|Regs], Color, NodeSets, Target) -> -  Color1 = setColor(Reg,Target:physical_name(Reg), Color), +  Color1 = setColor(Reg,physical_name(Reg,Target), Color),    NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets),    defaultColoring(Regs, Color1, NodeSets1, Target). @@ -1283,7 +1294,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->        ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]),        Alias_src = getAlias(Source, Alias),        Alias_dst = getAlias(Dest, Alias), -      {U,V} = case Target:is_precoloured(Alias_dst) of +      {U,V} = case is_precoloured(Alias_dst, Target) of  		true -> {Alias_dst, Alias_src};  		false -> {Alias_src, Alias_dst}  	      end, @@ -1293,13 +1304,13 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) ->  	  %% drop coalesced move Move  	  {Moves0, IG, Alias, Worklists};  	_ -> -	  case (Target:is_precoloured(V) orelse +	  case (is_precoloured(V, Target) orelse  		hipe_ig:nodes_are_adjacent(U, V, IG)) of   	    true ->  	      %% drop constrained move Move  	      {Moves0, IG, Alias, Worklists};  	    false -> -	      case (case Target:is_precoloured(U) of +	      case (case is_precoloured(U, Target) of  		      true ->  			AdjV = hipe_ig:node_adj_list(V, IG),  			all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1350,7 +1361,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) ->        ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]),        Alias_src = getAlias(Source, Alias),        Alias_dst = getAlias(Dest, Alias), -      {U,V} = case Target:is_precoloured(Alias_dst) of +      {U,V} = case is_precoloured(Alias_dst, Target) of  		true -> {Alias_dst, Alias_src};  		false -> {Alias_src, Alias_dst}  	      end, @@ -1361,7 +1372,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) ->  	  Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target),  	  {Moves1, IG, Worklists1, Alias};  	 _ -> -	  case (Target:is_precoloured(V) orelse +	  case (is_precoloured(V, Target) orelse  		hipe_ig:nodes_are_adjacent(U, V, IG)) of   	    true ->  	      Moves1 = Moves0, % drop constrained move Move @@ -1369,7 +1380,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) ->  	      Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target),  	      {Moves1, IG, Worklists2, Alias};  	    false -> -	      case (case Target:is_precoloured(U) of +	      case (case is_precoloured(U, Target) of  		      true ->  			AdjV = hipe_ig:node_adj_list(V, IG),  			all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1405,7 +1416,8 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) ->  %%   K             -- Number of registers  %%   Moves         -- Current move information  %%   IG            -- Interference graph -%%   Target        -- The containing the target-specific functions +%%   Target        -- The containing the target-specific functions, along with +%%                    its context data.  %%     %% Returns:  %%   Worklists (updated) @@ -1413,7 +1425,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) ->  -ifdef(COMPARE_ITERATED_OPTIMISTIC).  add_worklist(Worklists, U, K, Moves, IG, Target) -> -  case (not(Target:is_precoloured(U)) +  case (not(is_precoloured(U, Target))  	andalso not(hipe_moves:move_related(U, Moves))  	andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of      true -> @@ -1524,12 +1536,12 @@ combine(U, V, IG, Alias, Worklists, K, Target) ->  combine_edges([], _U, IG, _Worklists, _K, _Target) ->    IG; -combine_edges([T|Ts], U, IG, Worklists, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, K, Target={TgtMod,TgtCtx}) ->    case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of      true -> combine_edges(Ts, U, IG, Worklists, K, Target);      _ -> -      IG1 = hipe_ig:add_edge(T, U, IG, Target), -      IG2 = case Target:is_precoloured(T) of +      IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), +      IG2 = case is_precoloured(T, Target) of  	      true -> IG1;  	      false -> hipe_ig:dec_node_degree(T, IG1)  	    end, @@ -1559,7 +1571,7 @@ combine_edges([T|Ts], U, IG, Worklists, K, Target) ->  -ifdef(COMPARE_ITERATED_OPTIMISTIC).  combine_edges_O([], _U, IG, Worklists, Moves, _K, _Target) ->    {IG, Worklists, Moves}; -combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) ->    case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of      true -> combine_edges_O(Ts, U, IG, Worklists, Moves, K, Target);      _ -> @@ -1576,7 +1588,7 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) ->        %% worklist, and that's where decrement_degree() expects to find it.        %% This issue is not covered in the published algorithm.        OldDegree = hipe_ig:get_node_degree(T, IG), -      IG1 = hipe_ig:add_edge(T, U, IG, Target), +      IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx),        NewDegree = hipe_ig:get_node_degree(T, IG1),        Worklists0 =  	if NewDegree =:= K, OldDegree =:= K-1 -> @@ -1609,7 +1621,8 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) ->  %%   Alias          -- The Alias vector before undoing  %%   SavedAdj       -- Saved adjacency list  %%   IG             -- Interference graph -%%   Target         -- The module containing the target-specific functions. +%%   Target         -- The module containing the target-specific functions, +%%                     along with its context data.  %%     %% Returns:  %%   list of primitive nodes, that is all nodes that were previously @@ -1676,7 +1689,8 @@ findPrimitiveNodes(Node, N, Alias, PrimitiveNodes) ->  %%   N             -- Node that should be uncoalesced  %%   SavedAdj      -- Saved adjacency list  %%   IG            -- Interference graph -%%   Target        -- The module containing the target-specific functions. +%%   Target        -- The module containing the target-specific functions, along +%%                    with its context data.  %%     %% Returns:  %%   updated Interferece graph @@ -1702,16 +1716,16 @@ fixAdj(N, SavedAdj, IG, Target) ->  removeAdj([], _N, _IG, _Target) ->    true; -removeAdj([V| New], N, IG, Target) -> -  hipe_ig:remove_edge(V, N, IG, Target), +removeAdj([V| New], N, IG, Target={TgtMod,TgtCtx}) -> +  hipe_ig:remove_edge(V, N, IG, TgtMod, TgtCtx),    removeAdj(New, N, IG, Target).  %%restoreAdj([], _N, IG, _Alias, _Target) ->  %%  %%?debug_msg("adj_lists__after_restore_o ~n~p~n", [hipe_ig:adj_list(IG)]),  %%  IG; -%%restoreAdj([V| AdjToN], N, IG, Alias, Target) -> +%%restoreAdj([V| AdjToN], N, IG, Alias, Target={TgtMod,TgtCtx}) ->  %%  AliasToV = getAlias(V, Alias), -%%  IG1 = hipe_ig:add_edge(N, AliasToV, IG, Target), +%%  IG1 = hipe_ig:add_edge(N, AliasToV, IG, TgtMod, TgtCtx),  %%  restoreAdj(AdjToN, N, IG1, Alias, Target).  %% XXX This is probably a clumsy way of doing it @@ -1744,7 +1758,8 @@ findNew([A| Adj], Saved, New) ->  %%   R             -- Other node to test  %%   IG            -- Interference graph  %%   K             -- Number of registers -%%   Target        -- The module containing the target-specific functions +%%   Target        -- The module containing the target-specific functions, along +%%                    with its context data.  %%     %% Returns:  %%   true iff coalescing is OK @@ -1752,7 +1767,7 @@ findNew([A| Adj], Saved, New) ->  ok(T, R, IG, K, Target) ->    ((hipe_ig:is_trivially_colourable(T, K, IG)) -   orelse Target:is_precoloured(T) +   orelse is_precoloured(T, Target)     orelse hipe_ig:nodes_are_adjacent(T, R, IG)).  %%---------------------------------------------------------------------- @@ -1765,7 +1780,8 @@ ok(T, R, IG, K, Target) ->  %%   U             -- Node to test for coalescing  %%   IG            -- Interference graph  %%   K             -- Number of registers -%%   Target        -- The module containing the target-specific functions +%%   Target        -- The module containing the target-specific functions, along +%%                    with its context data.  %%     %% Returns:  %%   true iff coalescing is OK for all nodes in the list @@ -2042,3 +2058,24 @@ freezeEm3(_U,V,_M,K,WorkLists,Moves,IG,_Alias) ->        {WorkLists,Moves1}    end.  -endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +all_precoloured({TgtMod,TgtCtx}) -> +  TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> +  TgtMod:allocatable(TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> +  TgtMod:is_precoloured(R,TgtCtx). + +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> +  TgtMod:number_of_temporaries(CFG, TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> +  TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index c49b1e510f..ed7a26de8c 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -22,114 +22,123 @@  -module(hipe_ppc_specific).  %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 -	 ,analyze/1 -	 ,labels/1 -	 ,all_precoloured/0 -	 ,bb/2 -	 ,liveout/2 -	 ,reg_nr/1 -	 ,def_use/1 -	 ,is_move/1 -	 ,is_precoloured/1 -	 ,var_range/1 -	 ,allocatable/0 -	 ,non_alloc/1 -	 ,physical_name/1 -	 ,reverse_postorder/1 -	 ,livein/2 -	 ,uses/1 -	 ,defines/1 +-export([number_of_temporaries/2 +	 ,analyze/2 +	 ,labels/2 +	 ,all_precoloured/1 +	 ,bb/3 +	 ,liveout/3 +	 ,reg_nr/2 +	 ,def_use/2 +	 ,is_move/2 +	 ,is_precoloured/2 +	 ,var_range/2 +	 ,allocatable/1 +	 ,non_alloc/2 +	 ,physical_name/2 +	 ,reverse_postorder/2 +	 ,livein/3 +	 ,uses/2 +	 ,defines/2 +	 ,defines_all_alloc/2  	]).  %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]).  %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  hipe_ppc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> +  hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> -  non_alloc(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> +  non_alloc_1(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)).  %% same as hipe_ppc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_ppc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) ->    [X || X <- hipe_ppc_liveness_gpr:livein(Liveness,L),  	hipe_ppc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) ->    [X || X <- hipe_ppc_liveness_gpr:liveout(BB_in_out_liveness,Label),  	hipe_ppc:temp_is_allocatable(X)].  %% Registers stuff -allocatable() -> +allocatable(no_context) ->    hipe_ppc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) ->    hipe_ppc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    hipe_ppc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) ->    hipe_ppc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(ppc),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) ->    hipe_ppc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> +  hipe_ppc_cfg:bb_add(CFG,L,BB). +  %% PowerPC stuff -def_use(Instruction) -> -  {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> +  {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) ->    [X || X <- hipe_ppc_defuse:insn_use_gpr(I),  	hipe_ppc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) ->    [X || X <- hipe_ppc_defuse:insn_def_gpr(I),  	hipe_ppc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> +  hipe_ppc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) ->    case hipe_ppc:is_pseudo_move(Instruction) of      true ->        Dst = hipe_ppc:pseudo_move_dst(Instruction), @@ -142,28 +151,45 @@ is_move(Instruction) ->      false -> false    end. -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_ppc:temp_reg(Reg). +new_reg_nr(_) -> +  hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, Temp, _) -> +  hipe_ppc:mk_temp(Nr, hipe_ppc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> +  hipe_ppc_subst:insn_temps( +    fun(Op) -> +	case hipe_ppc:temp_is_allocatable(Op) +	  andalso hipe_ppc:temp_type(Op) =/= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). +  %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) ->    SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_ppc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) ->    R =:= hipe_ppc_registers:temp1() orelse    R =:= hipe_ppc_registers:temp2() orelse    R =:= hipe_ppc_registers:temp3() orelse    hipe_ppc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) ->    hipe_ppc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) ->    hipe_ppc_registers:args(hipe_ppc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 454aa4c686..6daa624720 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -22,126 +22,152 @@  -module(hipe_ppc_specific_fp).  %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 -	 ,analyze/1 -	 ,labels/1 -	 ,all_precoloured/0 -	 ,bb/2 -	 ,liveout/2 -	 ,reg_nr/1 -	 ,def_use/1 -	 ,is_move/1 -	 ,is_precoloured/1 -	 ,var_range/1 -	 ,allocatable/0 -	 ,non_alloc/1 -	 ,physical_name/1 -	 ,reverse_postorder/1 -	 ,livein/2 -	 ,uses/1 -	 ,defines/1 +-export([number_of_temporaries/2 +	 ,analyze/2 +	 ,labels/2 +	 ,all_precoloured/1 +	 ,bb/3 +	 ,liveout/3 +	 ,reg_nr/2 +	 ,def_use/2 +	 ,is_move/2 +	 ,is_precoloured/2 +	 ,var_range/2 +	 ,allocatable/1 +	 ,non_alloc/2 +	 ,physical_name/2 +	 ,reverse_postorder/2 +	 ,livein/3 +	 ,uses/2 +	 ,defines/2 +	 ,defines_all_alloc/2  	]).  %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]).  %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  hipe_ppc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, _) -> +  hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) ->    [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_ppc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) ->    hipe_ppc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) ->    hipe_ppc_liveness_fpr:liveout(BB_in_out_liveness, Label).  %% Registers stuff -allocatable() -> +allocatable(no_context) ->    hipe_ppc_registers:allocatable_fpr(). -all_precoloured() -> -  allocatable(). +all_precoloured(Ctx) -> +  allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    hipe_ppc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) ->    false. -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(ppc),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) ->    hipe_ppc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> +  hipe_ppc_cfg:bb_add(CFG,L,BB). +  %% PowerPC stuff -def_use(I) -> -  {defines(I), uses(I)}. +def_use(I, Ctx) -> +  {defines(I, Ctx), uses(I, Ctx)}. -uses(I) -> +uses(I, _) ->    hipe_ppc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) ->    hipe_ppc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> +  hipe_ppc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) ->    hipe_ppc:is_pseudo_fmove(I). -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_ppc:temp_reg(Reg). +new_reg_nr(_) -> +  hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, _Temp, _) -> +  hipe_ppc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> +  hipe_ppc_subst:insn_temps( +    fun(Op) -> +	case hipe_ppc:temp_is_allocatable(Op) +	  andalso hipe_ppc:temp_type(Op) =:= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). +  -ifdef(notdef). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) ->    SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_ppc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) ->    false. -is_arg(_R) -> +is_arg(_R, _) ->    false. -args(_CFG) -> +args(_CFG, _) ->    [].  -endif. diff --git a/lib/hipe/regalloc/hipe_reg_worklists.erl b/lib/hipe/regalloc/hipe_reg_worklists.erl index 88585f9f38..00679cf19c 100644 --- a/lib/hipe/regalloc/hipe_reg_worklists.erl +++ b/lib/hipe/regalloc/hipe_reg_worklists.erl @@ -30,8 +30,8 @@  -module(hipe_reg_worklists).  -author(['Andreas Wallin',  'Thorild Selén']). --export([new/5,			% only used by optimistic allocator -         new/6, +-export([new/6,			% only used by optimistic allocator +	 new/7,  	 simplify/1,  	 spill/1,  	 freeze/1, @@ -90,29 +90,32 @@  %%  %%%---------------------------------------------------------------------- -new(IG, Target, CFG, K, No_temporaries) -> % only used by optimistic allocator +%% only used by optimistic allocator +new(IG, TargetMod, TargetCtx, CFG, K, No_temporaries) ->    CoalescedTo = hipe_bifs:array(No_temporaries, 'none'), -  init(initial(Target, CFG), K, IG, empty(No_temporaries, CoalescedTo)). +  init(initial(TargetMod, TargetCtx, CFG), K, IG, +       empty(No_temporaries, CoalescedTo)). -new(IG, Target, CFG, Move_sets, K, No_temporaries) -> -  init(initial(Target, CFG), K, IG, Move_sets, empty(No_temporaries, [])). +new(IG, TargetMod, TargetCtx, CFG, Move_sets, K, No_temporaries) -> +  init(initial(TargetMod, TargetCtx, CFG), K, IG, Move_sets, +       empty(No_temporaries, [])). -initial(Target, CFG) -> -  {Min_temporary, Max_temporary} = Target:var_range(CFG), -  NonAlloc = Target:non_alloc(CFG), -  non_precoloured(Target, Min_temporary, Max_temporary, []) -    -- [Target:reg_nr(X) || X <- NonAlloc]. +initial(TargetMod, TargetCtx, CFG) -> +  {Min_temporary, Max_temporary} = TargetMod:var_range(CFG, TargetCtx), +  NonAlloc = TargetMod:non_alloc(CFG, TargetCtx), +  non_precoloured(TargetMod, TargetCtx, Min_temporary, Max_temporary, []) +    -- [TargetMod:reg_nr(X, TargetCtx) || X <- NonAlloc]. -non_precoloured(Target, Current, Max_temporary, Initial) -> +non_precoloured(TargetMod, TargetCtx, Current, Max_temporary, Initial) ->    if Current > Max_temporary ->        Initial;       true ->        NewInitial = -	case Target:is_precoloured(Current) of +	case TargetMod:is_precoloured(Current, TargetCtx) of  	  true -> Initial;  	  false -> [Current|Initial]  	end, -      non_precoloured(Target, Current+1, Max_temporary, NewInitial) +      non_precoloured(TargetMod, TargetCtx, Current+1, Max_temporary, NewInitial)    end.  %% construct an empty initialized worklists data structure diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index d29615a3a0..3777f90534 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -21,38 +21,44 @@  %%% Common wrapper for graph_coloring and coalescing regallocs.  -module(hipe_regalloc_loop). --export([ra/5, ra_fp/4]). +-export([ra/7, ra_fp/6]).  %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.  -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> -  {NewDefun, Coloring, _NewSpillIndex} = -    ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod), -  {NewDefun, Coloring}. +ra(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, TargetCtx) -> +  {NewCFG, Liveness, Coloring, _NewSpillIndex} = +    ra_common(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, +	      TargetCtx), +  {NewCFG, Liveness, Coloring}. -ra_fp(Defun, Options, RegAllocMod, TargetMod) -> -  ra_common(Defun, 0, Options, RegAllocMod, TargetMod). +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) -> +  ra_common(CFG, Liveness, 0, Options, RegAllocMod, TargetMod, TargetCtx). -ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> +ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, +	  TargetCtx) ->    ?inc_counter(ra_calls_counter, 1), -  CFG = TargetMod:defun_to_cfg(Defun), -  SpillLimit = TargetMod:number_of_temporaries(CFG), -  alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). +  SpillLimit0 = TargetMod:number_of_temporaries(CFG0, TargetCtx), +  {Coloring, _, CFG, Liveness} = +    call_allocator_initial(CFG0, Liveness0, SpillLimit0, SpillIndex, Options, +			   RegAllocMod, TargetMod, TargetCtx), +  %% The first iteration, the hipe_regalloc_prepass may create new temps, these +  %% should not end up above SpillLimit. +  SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), +  alloc(Coloring, CFG, Liveness, SpillLimit, SpillIndex, Options, +	RegAllocMod, TargetMod, TargetCtx). -alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> +alloc(Coloring, CFG0, Liveness, SpillLimit, SpillIndex, Options, +      RegAllocMod, TargetMod, TargetCtx) ->    ?inc_counter(ra_iteration_counter, 1), -  CFG = TargetMod:defun_to_cfg(Defun), -  {Coloring, _NewSpillIndex} = -    RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit, TargetMod, Options), -  {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), +  {CFG, DidSpill} = TargetMod:check_and_rewrite(CFG0, Coloring, TargetCtx),    case DidSpill of      false -> %% No new temps, we are done.        ?add_spills(Options, _NewSpillIndex), -      TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), -      {TempMap2, NewSpillIndex2} =  -	hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options,  -				 TargetMod, TempMap), +      TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), +      {TempMap2, NewSpillIndex2} = +	hipe_spillmin:stackalloc(CFG0, Liveness, [], SpillIndex, Options, +				 TargetMod, TargetCtx, TempMap),        Coloring2 =   	hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2),        %% case proplists:get_bool(verbose_spills, Options) of @@ -61,9 +67,38 @@ alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) ->        %%   false ->        %%     ok        %% end, -      {NewDefun, Coloring2, NewSpillIndex2}; +      {CFG, Liveness, Coloring2, NewSpillIndex2};      _ ->        %% Since SpillLimit is used as a low-water-mark        %% the list of temps not to spill is uninteresting. -      alloc(NewDefun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) +      {NewColoring, _NewSpillIndex} = +	call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, +		       RegAllocMod, TargetMod, TargetCtx), +      alloc(NewColoring, CFG, Liveness, SpillLimit, SpillIndex, Options, +	    RegAllocMod, TargetMod, TargetCtx) +  end. + +call_allocator_initial(CFG, Liveness, SpillLimit, SpillIndex, Options, +		       RegAllocMod, TargetMod, TargetCtx) -> +  case proplists:get_bool(ra_prespill, Options) of +    true -> +      hipe_regalloc_prepass:regalloc_initial( +	RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, +	TargetCtx, Options); +    false -> +      {C, SI} = RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, +				     TargetMod, TargetCtx, Options), +      {C, SI, CFG, Liveness} +  end. + +call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod, +	       TargetMod, TargetCtx) -> +  case proplists:get_bool(ra_prespill, Options) of +    true -> +      hipe_regalloc_prepass:regalloc( +	RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, +	TargetCtx, Options); +    false -> +      RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, +			   TargetCtx, Options)    end. diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl new file mode 100644 index 0000000000..2f1597ffd1 --- /dev/null +++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl @@ -0,0 +1,1006 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2016. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%%	       PREPASS FOR ITERATED REGISTER ALLOCATORS +%% +%% Implements a trivial partial but optimal fast register allocator to be used +%% as the first pass of the register allocation loop. +%% +%% The idea is to drastically reduce the number of temporaries, so as to speed +%% up the real register allocators. +%% +%%  * Spills trivially unallocatable temps +%%    This relies on the fact that calls intentionally clobber all registers. +%%    Since this is the case, any temp that is alive over a call can't possibly +%%    be allocated to anything but a spill slot. +%% +%%  * Partitions the program at points where no pseudos that were not spiled are +%%    live, and then do register allocation on these partitions independently. +%%    These program points are commonly, but not exclusively, the call +%%    instructions. +%% +%% TODO +%%  * This module seems very successful at finding every single spill; register +%%    allocation performance should be improved if we short-circuit the first +%%    hipe_regalloc_loop iteration, skipping directly to rewrite without ever +%%    calling RegAllocMod. +-module(hipe_regalloc_prepass). +-export([regalloc/8, regalloc_initial/8]). + +-ifndef(DEBUG). +-compile(inline). +-endif. + +%%-define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +%%% TUNABLES + +%% Partitions with fewer than ?TUNE_TOO_FEW_BBS basic block halves are merged +%% together before register allocation. +-define(TUNE_TOO_FEW_BBS, 256). + +%% Ignore the ra_partitioned option (and do whole function RA instead) when +%% there are fewer than ?TUNE_MIN_SPLIT_BBS basic blocks. +-define(TUNE_MIN_SPLIT_BBS, 384). + +%% We present a "pseudo-target" to the register allocator we wrap. +-export([analyze/2, +	 all_precoloured/1, +	 allocatable/1, +	 args/2, +	 bb/3, +	 def_use/2, +	 defines/2, +	 is_fixed/2,	% used by hipe_graph_coloring_regalloc +	 is_global/2, +	 is_move/2, +	 is_precoloured/2, +	 labels/2, +	 livein/3, +	 liveout/3, +	 non_alloc/2, +	 number_of_temporaries/2, +	 physical_name/2, +	 postorder/2, +	 reg_nr/2, +	 uses/2, +	 var_range/2, +	 reverse_postorder/2]). + +-record(prepass_ctx, +	{target_mod :: module() +	,target_ctx :: target_context() +	,sub        :: sub_map() % Translates temp numbers found in CFG and understood by +				 % Target to temp numbers passed to RegAllocMod. +	,inv        :: inv_map() % Translates temp numbers passed to RegAllocMod +				 % to temp numbers found in CFG and understood by +				 % Target +	,max_phys   :: temp()    % Exclusive upper bound on physical registers +	}). + +-record(cfg, +	{cfg        :: target_cfg() +	,bbs        :: transformed_bbs() +	,max_reg    :: temp()    % Exclusive upper bound on temp numbers +	,rpostorder :: undefined % Only precomputed with partitioned cfg +		     | [label()] +	}). + +-type bb()      :: hipe_bb:bb(). % containing instr() +-type liveset() :: ordsets:ordset(temp()). +-record(transformed_bb, +	{bb      :: bb() +	,livein  :: liveset() +	,liveout :: liveset() +	}). +-type transformed_bb() :: #transformed_bb{}. +-type transformed_bbs() :: #{label() => transformed_bb()}. + +-record(instr, +	{defuse    :: {[temp()], [temp()]} +	,is_move   :: boolean() +	}). +-type instr() :: #instr{}. + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type target_reg() :: non_neg_integer(). +-type target_liveness() :: any(). +-type target_liveset() :: ordsets:ordset(target_reg()). +-type target_context() :: any(). +-type spillno() :: non_neg_integer(). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec regalloc(module(), target_cfg(), target_liveness(), spillno(), spillno(), +	       module(), target_context(), proplists:proplist()) +	      -> {hipe_map(), spillno()}. +regalloc(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TargetMod, +	 TargetCtx, Options) -> +  {Coloring, SpillIndex, same} = +    regalloc_1(RegAllocMod, CFG, SpillIndex0, SpillLimit, TargetMod, +	       TargetCtx, Options, Liveness), +  {Coloring, SpillIndex}. + +%% regalloc_initial/7 is allowed to introduce new temporaries, unlike +%% regalloc/7. +%% In order for regalloc/7 to never introduce temporaries, regalloc/7 must never +%% choose to do split allocation unless regalloc_initial/7 does. This is the +%% reason that the splitting heuristic is solely based on the number of basic +%% blocks, which does not change during the register allocation loop. +-spec regalloc_initial(module(), target_cfg(), target_liveness(), spillno(), +		       spillno(), module(), target_context(), +		       proplists:proplist()) +		      -> {hipe_map(), spillno(), target_cfg(), +			  target_liveness()}. +regalloc_initial(RegAllocMod, CFG0, Liveness0, SpillIndex0, SpillLimit, +		 TargetMod, TargetCtx, Options) -> +  {Coloring, SpillIndex, NewCFG} = +    regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, +	       Options, Liveness0), +  {CFG, Liveness} = +    case NewCFG of +      same -> {CFG0, Liveness0}; +      {rewritten, CFG1} -> {CFG1, TargetMod:analyze(CFG1, TargetCtx)} +    end, +  {Coloring, SpillIndex, CFG, Liveness}. + +regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, +	   Options, Liveness) -> +  {ScanBBs, Seen, SpillMap, SpillIndex1} = +    scan_cfg(CFG0, Liveness, SpillIndex0, TargetMod, TargetCtx), + +  {PartColoring, SpillIndex, NewCFG} = +    case proplists:get_bool(ra_partitioned, Options) +      andalso length(TargetMod:labels(CFG0, TargetCtx)) > ?TUNE_MIN_SPLIT_BBS +    of +      true -> +	regalloc_partitioned(SpillMap, SpillIndex1, SpillLimit, ScanBBs, +			     CFG0, TargetMod, TargetCtx, RegAllocMod, Options); +      _ -> +	regalloc_whole(Seen, SpillMap, SpillIndex1, SpillLimit, ScanBBs, +		       CFG0, TargetMod, TargetCtx, RegAllocMod, Options) +    end, + +  SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpillMap)], +  Coloring = SpillColors ++ PartColoring, + +  ?ASSERT(begin +	    AllPrecoloured = TargetMod:all_precoloured(TargetCtx), +	    MaxPhys = lists:max(AllPrecoloured) + 1, +	    Unused = unused(live_pseudos(Seen, SpillMap, MaxPhys), +			    SpillMap, CFG0, TargetMod, TargetCtx), +	    unused_unused(Unused, CFG0, TargetMod, TargetCtx) +	  end), +  ?ASSERT(begin +	    CFG = +	      case NewCFG of +		same -> CFG0; +		{rewritten, CFG1} -> CFG1 +	      end, +	    check_coloring(Coloring, CFG, TargetMod, TargetCtx) +	  end), % Sanity-check +  ?ASSERT(just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, +			  TargetMod, TargetCtx, Options, SpillMap, Coloring, +			  Unused)), +  {Coloring, SpillIndex, NewCFG}. + +regalloc_whole(Seen, SpillMap, SpillIndex0, SpillLimit, ScanBBs, +	       CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> +  AllPrecoloured = TargetMod:all_precoloured(TargetCtx), +  MaxPhys = lists:max(AllPrecoloured) + 1, +  LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), +  {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = +    number_and_map(AllPrecoloured, LivePseudos, SpillLimit), +  BBs = transform_whole_cfg(ScanBBs, SubMap), +  SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR}, +  SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, +			    max_phys=MaxPhys, inv=InvMap, sub=SubMap}, +  {SubColoring, SpillIndex} = +    RegAllocMod:regalloc(SubMod, SubMod, SpillIndex0, SubSpillLimit, ?MODULE, +			 SubContext, Options), +  ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), +  {translate_coloring(SubColoring, InvMap), SpillIndex, same}. + +regalloc_partitioned(SpillMap, SpillIndex0, SpillLimit, ScanBBs, +		     CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> +  AllPrecoloured = TargetMod:all_precoloured(TargetCtx), +  MaxPhys = lists:max(AllPrecoloured) + 1, + +  DSets0 = initial_dsets(CFG, TargetMod, TargetCtx), +  PartBBList = part_cfg(ScanBBs, SpillMap, MaxPhys), +  DSets1 = join_whole_blocks(PartBBList, DSets0), +  {PartBBsRLList, DSets2} = merge_small_parts(DSets1), +  {PartBBs, DSets3} = merge_pointless_splits(PartBBList, ScanBBs, DSets2), +  SeenMap = collect_seenmap(PartBBsRLList, PartBBs), +  {RPostMap, _DSets4} = part_order(TargetMod:reverse_postorder(CFG, TargetCtx), +				   DSets3), + +  {Allocations, SpillIndex} = +    lists:mapfoldl( +      fun({Root, Elems}, SpillIndex1) -> +	  #{Root := Seen} = SeenMap, +	  #{Root := RPost} = RPostMap, +	  LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), +	  {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = +	    number_and_map(AllPrecoloured, LivePseudos, SpillLimit), +	  BBs = transform_cfg(Elems, PartBBs, SubMap), +	  SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, rpostorder=RPost}, +	  SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, +				    max_phys=MaxPhys, inv=InvMap, sub=SubMap}, +	  {SubColoring, SpillIndex2} = +	    RegAllocMod:regalloc(SubMod, SubMod, SpillIndex1, SubSpillLimit, +				 ?MODULE, SubContext, Options), +	  ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), +	  {{translate_coloring(SubColoring, InvMap), Elems}, SpillIndex2} +      end, SpillIndex0, PartBBsRLList), +  {Coloring, NewCFG} = +    combine_allocations(Allocations, MaxPhys, PartBBs, TargetMod, TargetCtx, +			CFG), +  {Coloring, SpillIndex, NewCFG}. + +-spec number_and_map([target_reg()], target_liveset(), target_reg()) +		    -> {sub_map(), inv_map(), temp(), temp(), temp()}. +number_and_map(Phys, Pseud, SpillLimit) -> +  MaxPhys = lists:max(Phys) + 1, +  ?ASSERT(Pseud =:= [] orelse lists:min(Pseud) >= MaxPhys), +  NrPseuds = length(Pseud), +  MaxR = MaxPhys+NrPseuds, +  PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)), +  MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped +    ++ PseudNrs, +  ?ASSERT(MapList =:= lists:ukeysort(1, MapList)), +  SubMap = {s,maps:from_list(MapList)}, +  InvMap = {i,maps:from_list([{Fake, Real} || {Real, Fake} <- MapList])}, +  SubSpillLimit = translate_spill_limit(MapList, SpillLimit), +  {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}. + +-spec translate_spill_limit([{target_reg(), temp()}], target_reg()) -> temp(). +translate_spill_limit([{Real,Fake}], SpillLimit) when Real < SpillLimit -> +  Fake + 1; +translate_spill_limit([{Real,_}|Ps], SpillLimit) when Real < SpillLimit -> +  translate_spill_limit(Ps, SpillLimit); +translate_spill_limit([{Real,Fake}|_], SpillLimit) when Real >= SpillLimit -> +  Fake. + +-spec live_pseudos(seen(), spill_map(), target_reg()) -> target_liveset(). +live_pseudos(Seen, SpillMap, MaxPhys) -> +  %% When SpillMap is much larger than Seen (which is typical in the partitioned +  %% case), it is much more efficient doing it like this than making an ordset +  %% of the spills and subtracting. +  ordsets:from_list( +    lists:filter(fun(R) -> R >= MaxPhys andalso not maps:is_key(R, SpillMap) +		 end, maps:keys(Seen))). + +-spec translate_coloring(hipe_map(), inv_map()) -> hipe_map(). +translate_coloring(SubColoring, InvMap) -> +  lists:map(fun({T, P}) -> {imap_get(T, InvMap), P} end, SubColoring). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Spill trivially unallocatable temps, create internal target-independent +%% program representation, and collect a set of all used temps. +-record(spill_state, +	{map :: spill_map() +	,ix  :: spillno() +	}). +-type spill_state() :: #spill_state{}. +-type spill_map()   :: #{target_reg() => spillno()}. + +-spec scan_cfg(target_cfg(), target_liveness(), spillno(), module(), +	       target_context()) +	      -> {scan_bbs() +		 ,seen() +		 ,spill_map() +		 ,spillno() +		 }. +scan_cfg(CFG, Liveness, SpillIndex0, TgtMod, TgtCtx) -> +  State0 = #spill_state{map=#{}, ix=SpillIndex0}, +  {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} = +    scan_bbs(TgtMod:labels(CFG,TgtCtx), CFG, Liveness, #{}, State0, #{}, TgtMod, +	     TgtCtx), +  {BBs, Seen, Spill, SpillIndex}. + +-type seen() :: #{target_reg() => []}. % set +-type scan_bb() :: {[instr()], target_liveset(), target_liveset()}. +-type scan_bbs() :: #{label() => scan_bb()}. + +-spec scan_bbs([label()], target_cfg(), target_liveness(), seen(), +	       spill_state(), scan_bbs(), module(), target_context()) +	      -> {scan_bbs(), seen(), spill_state()}. +scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _TgtMod, _TgtCtx) -> +  {BBs, Seen, State}; +scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, TgtMod, TgtCtx) -> +  Liveout = t_liveout(Liveness, L, TgtMod, TgtCtx), +  {Code, Livein, Seen, State} = +    scan_bb(lists:reverse(hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx))), Liveout, +	    Seen0, State0, [], TgtMod, TgtCtx), +  BB = {Code, Livein, Liveout}, +  scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>BB}, TgtMod, TgtCtx). + +-spec scan_bb([target_instr()], target_liveset(), seen(), spill_state(), +	      [instr()], module(), target_context()) +	     -> {[instr()] +		,target_liveset() +		,seen() +		,spill_state() +		}. +scan_bb([], Live, Seen, State, IAcc, _TgtMod, _TgtCtx) -> +  {IAcc, Live, Seen, State}; +scan_bb([I|Is], Live0, Seen0, State0, IAcc0, TgtMod, TgtCtx) -> +  {TDef, TUse} = TgtMod:def_use(I,TgtCtx), +  ?ASSERT(TDef =:= TgtMod:defines(I,TgtCtx)), +  ?ASSERT(TUse =:= TgtMod:uses(I,TgtCtx)), +  Def = ordsets:from_list(reg_names(TDef, TgtMod, TgtCtx)), +  Use = ordsets:from_list(reg_names(TUse, TgtMod, TgtCtx)), +  Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live0, Def)), +  Seen = add_seen(Def, add_seen(Use, Seen0)), +  NewI = #instr{defuse={Def, Use}, is_move=TgtMod:is_move(I,TgtCtx)}, +  IAcc = [NewI|IAcc0], +  State = +    case TgtMod:defines_all_alloc(I,TgtCtx) of +      false -> State0; +      true -> spill_all(ToSpill, TgtMod, TgtCtx, State0) +    end, +  %% We can drop "no-ops" here; where (if anywhere) is it worth it? +  scan_bb(Is, Live, Seen, State, IAcc, TgtMod, TgtCtx). + +-spec t_liveout(target_liveness(), label(), module(), target_context()) -> +		   target_liveset(). +t_liveout(Liveness, L, TgtMod, TgtCtx) -> +  %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted +  %% or consist of a few sorted subsequences (per type) +  ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), TgtMod, +			      TgtCtx)). + +-spec reg_names([target_temp()], module(), target_context()) -> [target_reg()]. +reg_names(Regs, TgtMod, TgtCtx) -> +  [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +-spec add_seen([target_reg()], seen()) -> seen(). +add_seen([], Seen) -> Seen; +add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}). + +-spec spill_all([target_reg()], module(), target_context(), spill_state()) -> +		   spill_state(). +spill_all([], _TgtMod, _TgtCtx, State) -> State; +spill_all([R|Rs], TgtMod, TgtCtx, State=#spill_state{map=Map, ix=Ix}) -> +  case TgtMod:is_precoloured(R,TgtCtx) or maps:is_key(R, Map) of +    true -> spill_all(Rs, TgtMod, TgtCtx, State); +    false -> spill_all(Rs, TgtMod, TgtCtx, +		       State#spill_state{map=Map#{R=>Ix}, ix=Ix+1}) +  end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (without split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_whole_cfg(scan_bbs(), sub_map()) -> transformed_bbs(). +transform_whole_cfg(BBs0, SubMap) -> +  maps:map(fun(_, BB) -> transform_whole_bb(BB, SubMap) end, BBs0). + +-spec transform_whole_bb(scan_bb(), sub_map()) -> transformed_bb(). +transform_whole_bb({Code, Livein, Liveout}, SubMap) -> +  #transformed_bb{ +     bb=hipe_bb:mk_bb([I#instr{defuse={smap_get_all_partial(Def, SubMap), +				       smap_get_all_partial(Use, SubMap)}} +		       || I = #instr{defuse={Def,Use}} <- Code]) +     %% Assume mapping preserves monotonicity +    ,livein=smap_get_all_partial(Livein, SubMap) +    ,liveout=smap_get_all_partial(Liveout, SubMap) +    }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (with split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Discover program partitioning +%% Regretfully, this needs to be a separate pass, as having the global live set +%% is crucial to get a useful partitioning. + +%% Single-block parts are merged if there are multiple in a single block, as it +%% is judged to not be beneficial to make them too small. + +-type part_bb_part() :: {[instr()], target_liveset(), target_liveset()}. +-type part_bb()  :: {single, part_bb_part()} +		  | {split, part_bb_part(), part_bb_part()}. +-type part_bb_list() :: [{label(), part_bb()}]. +-type part_bbs() :: #{label() => part_bb()}. +-type part_bb_sofar() :: single +		       | {split, [instr()], target_liveset()}. % , target_liveset() + +-spec part_cfg(scan_bbs(), spill_map(), target_reg()) -> part_bb_list(). +part_cfg(ScanBBs, SpillMap, MaxPhys) -> +  Liveset = mk_part_liveset(SpillMap, MaxPhys), +  lists:map(fun(BB) -> part_bb(BB, Liveset) end, maps:to_list(ScanBBs)). + +-spec part_bb({label(), scan_bb()}, part_liveset()) -> {label(), part_bb()}. +part_bb({L, BB0={Code0, Livein, Liveout}}, Liveset) -> +  {Sofar, NewCode} = part_bb_1(lists:reverse(Code0), Liveset, Liveout, []), +  BB = case Sofar of +	 single -> +	   ?ASSERT(Code0 =:= NewCode), +	   {single, BB0}; +	 {split, ExitCode, ExitLivein = EntryLiveout} -> +	   {split, {NewCode, Livein, EntryLiveout}, +	    {ExitCode, ExitLivein, Liveout}} +       end, +  {L, BB}. + +-spec part_bb_1([instr()], part_liveset(), target_liveset(), [instr()]) +	     -> {part_bb_sofar(), [instr()]}. +part_bb_1([], _Liveset, _Livein, IAcc) -> {single, IAcc}; +part_bb_1([I=#instr{defuse={Def,Use}}|Is], Liveset, Live0, IAcc0) -> +  Live = ordsets:union(Use, ordsets:subtract(Live0, Def)), +  IAcc = [I|IAcc0], +  case part_none_live(Live, Liveset) of +    false -> part_bb_1(Is, Liveset, Live, IAcc); +    %% One split point will suffice +    true -> {{split, IAcc, Live}, lists:reverse(Is)} +  end. + +-spec part_none_live(target_liveset(), part_liveset()) -> boolean(). +part_none_live(Live, Liveset) -> +  not lists:any(fun(R) -> part_liveset_is_live(R, Liveset) end, Live). + +-type part_liveset() :: {spill_map(), target_reg()}. + +-spec mk_part_liveset(spill_map(), target_reg()) -> part_liveset(). +mk_part_liveset(SpillMap, MaxPhys) -> {SpillMap, MaxPhys}. + +-spec part_liveset_is_live(target_reg(), part_liveset()) -> boolean(). +part_liveset_is_live(R, {SpillMap, MaxPhys}) when is_integer(R) -> +  R >= MaxPhys andalso not maps:is_key(R, SpillMap). + +%% @doc Merges split blocks where entry and exit belong to the same DSet. +%% Does not change DSets +-spec merge_pointless_splits(part_bb_list(), scan_bbs(), bb_dsets()) +			   -> {part_bbs(), bb_dsets()}. +merge_pointless_splits(PartBBList0, ScanBBs, DSets0) -> +  {PartBBList, DSets} = +    merge_pointless_splits_1(PartBBList0, ScanBBs, DSets0, []), +  {maps:from_list(PartBBList), DSets}. + +-spec merge_pointless_splits_1( +	part_bb_list(), scan_bbs(), bb_dsets(), part_bb_list()) +			      -> {part_bb_list(), bb_dsets()}. +merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets}; +merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) -> +  merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]); +merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) -> +  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), +  {ExitRoot,  DSets}  = dsets_find({exit,L},  DSets1), +  case EntryRoot =:= ExitRoot of +    false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]); +    true -> +      %% Reuse the code list from ScanBBs rather than concatenating the split +      %% parts +      #{L := BB} = ScanBBs, +      ?ASSERT(begin +		{L,{split,{_EntryCode,_,_},{_ExitCode,_,_}}}=P0, % [_| +		{_Code,_,_}=BB, +		_Code =:= (_EntryCode ++ _ExitCode) +	      end), +      merge_pointless_splits_1(Ps, ScanBBs, DSets, [{L,{single, BB}}|Acc]) +  end. + +-spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts(DSets0) -> +  {RLList, DSets1} = dsets_to_rllist(DSets0), +  RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList], +  merge_small_parts_1(RLLList, DSets1, []). + +-spec merge_small_parts_1( +	[{bb_dset_key(), non_neg_integer(), [bb_dset_key()]}], +	bb_dsets(), bb_dsets_rllist() +       ) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts_1([], DSets, Acc) -> {Acc, DSets}; +merge_small_parts_1([{R, _, Es}], DSets, Acc) -> {[{R, Es}|Acc], DSets}; +merge_small_parts_1([{R, L, Es}|Ps], DSets, Acc) when L >= ?TUNE_TOO_FEW_BBS -> +  merge_small_parts_1(Ps, DSets, [{R,Es}|Acc]); +merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc) +  when L >= ?TUNE_TOO_FEW_BBS -> +  merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]); +merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) -> +  ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS), +  DSets1 = dsets_union(R1, R2, DSets0), +  {R, DSets} = dsets_find(R1, DSets1), +  merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc). + +%% @doc Partition an ordering over BBs into subsequences for the dsets that +%% contain them. +%% Does not change dsets. +-spec part_order([label()], bb_dsets()) +		-> {#{bb_dset_key() => [label()]}, bb_dsets()}. +part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}). + +part_order([], DSets, Acc) -> {Acc, DSets}; +part_order([L|Ls], DSets0, Acc0) -> +  {EntryRoot, DSets1} = dsets_find({entry,L}, DSets0), +  {ExitRoot,  DSets2} = dsets_find({exit,L},  DSets1), +  Acc1 = map_append(EntryRoot, L, Acc0), +  %% Only include the label once if both entry and exit is in same partition +  Acc2 = case EntryRoot =:= ExitRoot of +	   true -> Acc1; +	   false -> map_append(ExitRoot, L, Acc1) +	 end, +  part_order(Ls, DSets2, Acc2). + +map_append(Key, Elem, Map) -> +  case Map of +    #{Key := List} -> Map#{Key := [Elem|List]}; +    #{} -> Map#{Key => [Elem]} +  end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Interference graph partitioning +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% We partition the program + +%% The algorithm considers two kinds of components; those that are local to a +%% basic block, and those that are not. The key is that any basic block belongs +%% to at most two non-local components; one from the beginning to the first +%% split point, and one from the end to the last split point. + +-type bb_dset_key() :: {entry | exit, label()}. +-type bb_dsets() :: dsets(bb_dset_key()). +-type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}]. + +-spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets(). +initial_dsets(CFG, TgtMod, TgtCtx) -> +  Labels = TgtMod:labels(CFG, TgtCtx), +  DSets0 = dsets_new(lists:append([[{entry,L},{exit,L}] || L <- Labels])), +  Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)] +			|| L <- Labels]), +  lists:foldl(fun({X, Y}, DS) -> dsets_union({exit,X}, {entry,Y}, DS) end, +	      DSets0, Edges). + +-spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets(). +join_whole_blocks(PartBBList, DSets0) -> +  lists:foldl(fun({L, {single, _}}, DS) -> dsets_union({entry,L}, {exit,L}, DS); +		 ({_, {split, _, _}}, DS) -> DS +	      end, DSets0, PartBBList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The disjoint set forests data structure, for elements of arbitrary types. +%% Note that the find operation mutates the set. +%% +%% We could do this more efficiently if we restricted the elements to integers, +%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used, +%% for a persistent interface (which isn't that nice when even accessors return +%% modified copies), the array module could be used. +-type dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}. + +-spec dsets_new([E]) -> dsets(E). +dsets_new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]). + +-spec dsets_find(E, dsets(E)) -> {E, dsets(E)}. +dsets_find(E, DS0) -> +  case DS0 of +    #{E := {root,_}} -> {E, DS0}; +    #{E := {node,N}} -> +      case dsets_find(N, DS0) of +	{N, _}=T -> T; +	{R, DS1} -> {R, DS1#{E := {node,R}}} +      end +   ;_ -> error(badarg, [E, DS0]) +  end. + +-spec dsets_union(E, E, dsets(E)) -> dsets(E). +dsets_union(X, Y, DS0) -> +  {XRoot, DS1} = dsets_find(X, DS0), +  case dsets_find(Y, DS1) of +    {XRoot, DS2} -> DS2; +    {YRoot, DS2} -> +      #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2, +      if XRR < YRR -> DS2#{XRoot := {node,YRoot}}; +	 XRR > YRR -> DS2#{YRoot := {node,XRoot}}; +	 true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}} +      end +  end. + +-spec dsets_to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}. +dsets_to_rllist(DS0) -> +  {Lists, DS} = dsets_to_rllist(maps:keys(DS0), #{}, DS0), +  {maps:to_list(Lists), DS}. + +dsets_to_rllist([], Acc, DS) -> {Acc, DS}; +dsets_to_rllist([E|Es], Acc, DS0) -> +  {ERoot, DS} = dsets_find(E, DS0), +  dsets_to_rllist(Es, map_append(ERoot, E, Acc), DS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Collect all referenced temps in each partition. + +%% Note: The temps could be collected during the partition pass for each +%% half-bb, and then combined here. Would that be beneficial? + +collect_seenmap(PartBBsRLList, PartBBs) -> +  collect_seenmap(PartBBsRLList, #{}, PartBBs). + +collect_seenmap([], Acc, _PartBBs) -> Acc; +collect_seenmap([{R,Elems}|Ps], Acc, PartBBs) -> +  Seen = collect_seen_part(Elems, #{}, PartBBs), +  collect_seenmap(Ps, Acc#{R => Seen}, PartBBs). + +collect_seen_part([], Acc, _PartBBs) -> Acc; +collect_seen_part([{Half,L}|Es], Acc0, PartBBs) -> +  BB = maps:get(L, PartBBs), +  Code = case {Half, BB} of +	   {entry, {single, {C,_,_}}} -> C; +	   {entry, {split, {C,_,_}, _}} -> C; +	   {exit,  {split, _, {C,_,_}}} -> C; +	   {exit,  {single, _}} -> [] % Ignore; was collected by its entry half +	 end, +  Acc = collect_seen_code(Code, Acc0), +  collect_seen_part(Es, Acc, PartBBs). + +collect_seen_code([], Acc) -> Acc; +collect_seen_code([#instr{defuse={Def,Use}}|Is], Acc) -> +  collect_seen_code(Is, add_seen(Def, add_seen(Use, Acc))). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_cfg([bb_dset_key()], part_bbs(), sub_map()) -> transformed_bbs(). + +transform_cfg(Elems, PartBBs, SubMap) -> +  transform_cfg(Elems, PartBBs, SubMap, #{}). + +transform_cfg([], _PartBBs, _SubMap, Acc) -> Acc; +transform_cfg([{Half,L}|Es], PartBBs, SubMap, Acc0) -> +  #{L := PBB} = PartBBs, +  Acc = case {Half, PBB} of +	  {entry, {single,BB}}  -> Acc0#{L=>transform_bb(BB, SubMap)}; +	  {entry, {split,BB,_}} -> Acc0#{L=>transform_bb(BB, SubMap)}; +	  {exit,  {split,_,BB}} -> Acc0#{L=>transform_bb(BB, SubMap)}; +	  {exit,  {single, _}}  -> Acc0 % Was included by the entry half +	end, +  transform_cfg(Es, PartBBs, SubMap, Acc). + +-spec transform_bb(part_bb_part(), sub_map()) -> transformed_bb(). +transform_bb(BB, SubMap) -> +  %% For now, part_bb_part() and split_bb() share representation +  transform_whole_bb(BB, SubMap). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Combine colorings and substitute temps in actual cfg if there were +%% collisions. + +%% A temp can sometimes appear in more than one partition. For example, defining +%% an unused value. If these are found by combine_allocations, we have to +%% rename this temp in one of the partitions on the real cfg. +%% +%% We optimistically assume that there will be no such collisions, and when +%% there are, we fix them up as they're found. + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), +			  part_bbs(), module(), target_context(), target_cfg()) +			 -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([{A,_}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, CFG) -> +  {Phys, Pseuds} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), +  {Seen, _, []} = partition_by_seen(Pseuds, #{}, [], []), +  combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, Pseuds, +		      {same, CFG}). + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), +			  part_bbs(), module(), target_context(), hipe_map(), +			  seen(), hipe_map(), {same|rewritten, target_cfg()}) +			 -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([], _MaxPhys, _PartBBs, _TgtMod, _TgtCtx, Phys, _Seen, +		    Pseuds, CFGT) -> +  {Phys ++ Pseuds, case CFGT of +		     {same, _} -> same; +		     {rewritten, _} -> CFGT +		   end}; +combine_allocations([{A,PartElems}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, +		    Seen0, Acc, CFGT={_,CFG0}) -> +  {Phys, Pseuds0} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), +  {Seen, Pseuds, Collisions} = partition_by_seen(Pseuds0, Seen0, [], []), +  case Collisions of +    [] -> combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, +			      Pseuds++Acc, CFGT); +    _ -> +      %% There were collisions; rename all the temp numbers in Collisions +      {CFG, Renamed} = rename(Collisions, PartElems, PartBBs, TgtMod, TgtCtx, +			      CFG0), +      combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, +			  Pseuds++Renamed++Acc, {rewritten,CFG}) +  end. + +%% @doc Partitions a coloring on whether the registers are in the Seen set, +%% adding any new registers to the set. +-spec partition_by_seen(hipe_map(), seen(), hipe_map(), hipe_map()) +		       -> {seen(), hipe_map(), hipe_map()}. +partition_by_seen([], Seen, Acc, Collisions) -> {Seen, Acc, Collisions}; +partition_by_seen([C={R,_}|Cs], Seen, Acc, Colls) -> +  case Seen of +    #{R := _} -> partition_by_seen(Cs, Seen, Acc, [C|Colls]); +    #{}       -> partition_by_seen(Cs, Seen#{R => []}, [C|Acc], Colls) +  end. + +-spec rename(hipe_map(), [bb_dset_key()], part_bbs(), module(), +	     target_context(), target_cfg()) +	    -> {target_cfg(), hipe_map()}. +rename(CollisionList, PartElems, PartBBs, TgtMod, TgtCtx, CFG0) -> +  {Map, Renamed} = new_names(CollisionList, TgtMod, TgtCtx, #{}, []), +  Fun = fun(I) -> +	    TgtMod:subst_temps( +	      fun(Temp) -> +		  N = TgtMod:reg_nr(Temp, TgtCtx), +		  case Map of +		    #{N := Subst} -> TgtMod:update_reg_nr(Subst, Temp, TgtCtx); +		    #{} -> Temp +		  end +	      end, I, TgtCtx) +	end, +  {rename_1(PartElems, PartBBs, TgtMod, TgtCtx, Fun, CFG0), Renamed}. + +-type rename_map() :: #{target_reg() => target_reg()}. +-type rename_fun() :: fun((target_instr()) -> target_instr()). + +-spec new_names(hipe_map(), module(), target_context(), rename_map(), +		hipe_map()) +	       -> {rename_map(), hipe_map()}. +new_names([], _TgtMod, _TgtCtx, Map, Renamed) -> {Map, Renamed}; +new_names([{R,C}|As], TgtMod, TgtCtx, Map, Renamed) -> +  Subst = TgtMod:new_reg_nr(TgtCtx), +  new_names(As, TgtMod, TgtCtx, Map#{R => Subst}, [{Subst, C} | Renamed]). + +%% @doc Maps over all instructions in a partition on the original CFG. +-spec rename_1([bb_dset_key()], part_bbs(), module(), target_context(), +	       rename_fun(), target_cfg()) -> target_cfg(). +rename_1([], _PartBBs, _TgtMod, _TgtCtx, _Fun, CFG) -> CFG; +rename_1([{Half,L}|Es], PartBBs, TgtMod, TgtCtx, Fun, CFG0) -> +  Code0 = hipe_bb:code(BB = TgtMod:bb(CFG0, L, TgtCtx)), +  Code = case {Half, maps:get(L, PartBBs)} of +	  {entry, {single,_}} -> lists:map(Fun, Code0); +	  {entry, {split,PBBP,_}} -> +	     map_start(Fun, part_bb_part_len(PBBP), Code0); +	  {exit, {split,_,PBBP}} -> +	     map_end(Fun, part_bb_part_len(PBBP), Code0); +	  {exit, {single, _}} -> Code0 +	end, +  CFG = TgtMod:update_bb(CFG0, L, hipe_bb:code_update(BB, Code), TgtCtx), +  rename_1(Es, PartBBs, TgtMod, TgtCtx, Fun, CFG). + +-spec part_bb_part_len(part_bb_part()) -> non_neg_integer(). +part_bb_part_len({Code, _Livein, _Liveout}) -> length(Code). + +%% @doc Map the first N elements of a list +-spec map_start(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_start(_Fun, 0, List) -> List; +map_start(Fun, N, [E|Es]) -> +  [Fun(E)|map_start(Fun, N-1, Es)]. + +%% @doc Map the last N elements of a list +-spec map_end(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_end(Fun, N, List) -> +  map_end(Fun, N, length(List), List). + +map_end(Fun, N, Len, [E|Es]) when Len > N -> [E|map_end(Fun, N, Len-1, Es)]; +map_end(Fun, N, Len, List) when Len =:= N -> lists:map(Fun, List). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Temp map ADT +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-type sub_map() :: {s,#{target_reg() => temp()}}. +-type inv_map() :: {i,#{temp() => target_reg()}}. + +-spec smap_get(target_reg(), sub_map()) -> temp(). +smap_get(Temp, {s,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec imap_get(temp(), inv_map()) -> target_reg(). +imap_get(Temp, {i,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec smap_get_all_partial([target_reg()], sub_map()) -> [temp()]. +smap_get_all_partial([], _) -> []; +smap_get_all_partial([T|Ts], SMap={s,Map}) when is_integer(T) -> +  case Map of +    #{T := R} -> [R|smap_get_all_partial(Ts, SMap)]; +    #{} -> smap_get_all_partial(Ts, SMap) +  end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Validation +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-ifdef(DO_ASSERT). +%%%%%%%%%%%%%%%%%%%% +%% Check that the coloring is correct (if the IG is correct): +%% + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y),  ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)).  +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)).  +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +check_coloring(Coloring, CFG, TgtMod, TgtCtx) -> +  ?report0("checking coloring ~p~n",[Coloring]), +  IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), +  check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)), +	     init_coloring(Coloring, TgtMod, TgtCtx)). + +init_coloring(Xs, TgtMod, TgtCtx) -> +  hipe_temp_map:cols2tuple(Xs, TgtMod, TgtCtx). + +check_color_of(X, Cols) -> +  case hipe_temp_map:find(X, Cols) of +    unknown -> +      uncolored; +    C -> +      C +  end. + +check_cols([], _Cols) -> +  ?report("coloring valid~n",[]), +  true; +check_cols([{X,Neighbours}|Xs], Cols) -> +  Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours], +  C = check_color_of(X, Cols), +  case valid_coloring(X, C, Cs) of +    yes -> +      check_cols(Xs, Cols); +    {no,Invalids} -> +      ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), +      check_cols(Xs, Cols) andalso false +  end. + +valid_coloring(_X, _C, []) -> +  yes; +valid_coloring(X, C, [{Y,C}|Ys]) -> +  case valid_coloring(X, C, Ys) of +    yes -> {no, [Y]}; +    {no,Zs} -> {no, [Y|Zs]} +  end; +valid_coloring(X, C, [_|Ys]) -> +  valid_coloring(X, C, Ys). + +unused_unused(Unused, CFG, TgtMod, TgtCtx) -> +  IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), +  lists:all(fun(R) -> case hipe_ig:get_node_degree(R, IG) of +			0 -> true; +			Deg -> +			  ?msg("Temp ~w is in unused but has degree ~w~n", +			       [R, Deg]), +			  false +		      end end, Unused). + +%%%%%%%%%%%%%%%%%%%% +%% Check that no register allocation opportunities were missed due to ?MODULE +%% +just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, +		TgtCtx, Options, SpillMap, Coloring, Unused) -> +  {CheckColoring, _} = +    RegAllocMod:regalloc(CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, TgtCtx, +			 Options), +  Now   = lists:sort([{R,Kind} || {R,{Kind,_}} <- Coloring, +				  not ordsets:is_element(R, Unused)]), +  Check = lists:sort([{R,Kind} || {R,{Kind,_}} <- CheckColoring, +				  not ordsets:is_element(R, Unused)]), +  CheckMap = maps:from_list(Check), +  SaneSpills = all_spills_sane_1(CheckColoring, SpillMap), +  case SaneSpills +    andalso lists:all(fun({R, spill}) -> maps:get(R, CheckMap) =:= spill; +			 ({_,reg}) -> true +		      end, Now) +  of +    true -> true; +    false -> +      {NowRegs, _} = _NowCount = count(Now), +      {CheckRegs, _} = _CheckCount = count(Check), +      {M,F,A} = element(2, element(3, CFG)), +      io:fwrite(standard_error, "Colorings differ (~w, ~w)!~n" +		"MFA: ~w:~w/~w~n" +		"Unused: ~w~n" +		"Now:~w~nCorrect:~w~n", +		[TgtMod, RegAllocMod, +		 M,F,A, +		 Unused, +		 Now -- Check, Check -- Now]), +	SaneSpills andalso NowRegs >= CheckRegs +  end. + +count(C) -> {length([[] || {_, reg} <- C]), +	     length([[] || {_, spill} <- C])}. + +unused(LivePseudos, SpillMap, CFG, TgtMod, TgtCtx) -> +  {TMin, TMax} = TgtMod:var_range(CFG,TgtCtx), +  SpillOSet = ordsets:from_list(maps:keys(SpillMap)), +  PhysOSet = ordsets:from_list(TgtMod:all_precoloured(TgtCtx)), +  Used = ordsets:union(LivePseudos, ordsets:union(PhysOSet, SpillOSet)), +  ordsets:subtract(lists:seq(TMin, TMax), Used). + +%% Check that no temp that we wrote off was actually allocatable. +all_spills_sane_1(_, Empty) when map_size(Empty) =:= 0 -> true; +all_spills_sane_1([], _Nonempty) -> false; +all_spills_sane_1([{T, {reg, _}}|Cs], SpillMap) -> +  not maps:is_key(T, SpillMap) andalso all_spills_sane_1(Cs, SpillMap); +all_spills_sane_1([{T, {spill, _}}|Cs], SpillMap) -> +  all_spills_sane_1(Cs, maps:remove(T, SpillMap)). + +-endif. % DO_ASSERT + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Pseudo-target interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +analyze(Cfg, _ModRec) -> Cfg. +bb(Cfg=#cfg{bbs=BBs}, Ix, _ModRec) -> +  case BBs of +    #{Ix := #transformed_bb{bb=BB}} -> BB; +    _ -> error(badarg, [Cfg, Ix]) +  end. +args(Arity, #prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx, sub=SubM}) -> +  smap_get(TgtMod:args(Arity,TgtCtx), SubM). +labels(#cfg{bbs=BBs}, _ModRec) -> maps:keys(BBs). +livein(#cfg{bbs=BBs}, Lb, _SubMod) -> +  #{Lb := #transformed_bb{livein=Livein}} = BBs, +  Livein. +liveout(#cfg{bbs=BBs}, Lb, _SubMod) -> +  #{Lb := #transformed_bb{liveout=Liveout}} = BBs, +  Liveout. +uses(I, MR) -> element(2, def_use(I, MR)). +defines(I, MR) -> element(1, def_use(I, MR)). +def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse. +is_move(#instr{is_move=IM}, _ModRec) -> IM. +is_fixed(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx,inv=InvM}) -> +  TgtMod:is_fixed(imap_get(Reg, InvM),TgtCtx). % XXX: Is this hot? +is_global(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, +			    max_phys=MaxPhys}) when Reg < MaxPhys -> +  TgtMod:is_global(Reg,TgtCtx). % assume id-map +is_precoloured(Reg, #prepass_ctx{max_phys=MaxPhys}) -> Reg < MaxPhys. +reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally) +non_alloc(#cfg{cfg=CFG}, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, +				  sub=SubM}) -> +  smap_get_all_partial(reg_names(TgtMod:non_alloc(CFG,TgtCtx), TgtMod, TgtCtx), +		       SubM). +number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR. +allocatable(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> +  TgtMod:allocatable(TgtCtx). % assume id-map +physical_name(Reg, _ModRec) -> Reg. +all_precoloured(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> +  TgtMod:all_precoloured(TgtCtx). % dito +var_range(#cfg{cfg=_CFG, max_reg=MaxReg}, +	  #prepass_ctx{target_mod=_TgtMod, target_ctx=_TgtCtx}) -> +  ?ASSERT(begin {TgtMin, _} = _TgtMod:var_range(_CFG,_TgtCtx), +		TgtMin =:= 0 +	  end), +  {0, MaxReg-1}. + +postorder(#cfg{cfg=CFG,rpostorder=undefined}, +	  #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> +  TgtMod:postorder(CFG,TgtCtx); +postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> +  lists:reverse(Labels). + +reverse_postorder(#cfg{cfg=CFG,rpostorder=undefined}, +		  #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> +  TgtMod:reverse_postorder(CFG,TgtCtx); +reverse_postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> +  Labels. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 8d34604f84..4c575c1c83 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -22,114 +22,123 @@  -module(hipe_sparc_specific).  %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 -	 ,analyze/1 -	 ,labels/1 -	 ,all_precoloured/0 -	 ,bb/2 -	 ,liveout/2 -	 ,reg_nr/1 -	 ,def_use/1 -	 ,is_move/1 -	 ,is_precoloured/1 -	 ,var_range/1 -	 ,allocatable/0 -	 ,non_alloc/1 -	 ,physical_name/1 -	 ,reverse_postorder/1 -	 ,livein/2 -	 ,uses/1 -	 ,defines/1 +-export([number_of_temporaries/2 +	 ,analyze/2 +	 ,labels/2 +	 ,all_precoloured/1 +	 ,bb/3 +	 ,liveout/3 +	 ,reg_nr/2 +	 ,def_use/2 +	 ,is_move/2 +	 ,is_precoloured/2 +	 ,var_range/2 +	 ,allocatable/1 +	 ,non_alloc/2 +	 ,physical_name/2 +	 ,reverse_postorder/2 +	 ,livein/3 +	 ,uses/2 +	 ,defines/2 +	 ,defines_all_alloc/2  	]).  %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]).  %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  hipe_sparc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> +  hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> -  non_alloc(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> +  non_alloc_1(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)).  %% same as hipe_sparc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_sparc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) ->    [X || X <- hipe_sparc_liveness_gpr:livein(Liveness,L),  	hipe_sparc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) ->    [X || X <- hipe_sparc_liveness_gpr:liveout(BB_in_out_liveness,Label),  	hipe_sparc:temp_is_allocatable(X)].  %% Registers stuff -allocatable() -> +allocatable(no_context) ->    hipe_sparc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) ->    hipe_sparc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    hipe_sparc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) ->    hipe_sparc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(sparc),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) ->    hipe_sparc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> +  hipe_sparc_cfg:bb_add(CFG,L,BB). +  %% SPARC stuff -def_use(Instruction) -> -  {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> +  {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) ->    [X || X <- hipe_sparc_defuse:insn_use_gpr(I),  	hipe_sparc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) ->    [X || X <- hipe_sparc_defuse:insn_def_gpr(I),  	hipe_sparc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> +  hipe_sparc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) ->    case hipe_sparc:is_pseudo_move(Instruction) of      true ->        Dst = hipe_sparc:pseudo_move_dst(Instruction), @@ -142,28 +151,45 @@ is_move(Instruction) ->      false -> false    end. -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_sparc:temp_reg(Reg). +new_reg_nr(_) -> +  hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, Temp, _) -> +  hipe_sparc:mk_temp(Nr, hipe_sparc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> +  hipe_sparc_subst:insn_temps( +    fun(Op) -> +	case hipe_sparc:temp_is_allocatable(Op) +	  andalso hipe_sparc:temp_type(Op) =/= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). +  %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) ->    SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_sparc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) ->    R =:= hipe_sparc_registers:temp1() orelse    R =:= hipe_sparc_registers:temp2() orelse    R =:= hipe_sparc_registers:temp3() orelse    hipe_sparc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) ->    hipe_sparc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) ->    hipe_sparc_registers:args(hipe_sparc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 2edd3cb47e..0334142b95 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -22,126 +22,152 @@  -module(hipe_sparc_specific_fp).  %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 -	 ,analyze/1 -	 ,labels/1 -	 ,all_precoloured/0 -	 ,bb/2 -	 ,liveout/2 -	 ,reg_nr/1 -	 ,def_use/1 -	 ,is_move/1 -	 ,is_precoloured/1 -	 ,var_range/1 -	 ,allocatable/0 -	 ,non_alloc/1 -	 ,physical_name/1 -	 ,reverse_postorder/1 -	 ,livein/2 -	 ,uses/1 -	 ,defines/1 +-export([number_of_temporaries/2 +	 ,analyze/2 +	 ,labels/2 +	 ,all_precoloured/1 +	 ,bb/3 +	 ,liveout/3 +	 ,reg_nr/2 +	 ,def_use/2 +	 ,is_move/2 +	 ,is_precoloured/2 +	 ,var_range/2 +	 ,allocatable/1 +	 ,non_alloc/2 +	 ,physical_name/2 +	 ,reverse_postorder/2 +	 ,livein/3 +	 ,uses/2 +	 ,defines/2 +	 ,defines_all_alloc/2  	]).  %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]).  %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  hipe_sparc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, no_context) -> +  hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) ->    [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    hipe_sparc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) ->    hipe_sparc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) ->    hipe_sparc_liveness_fpr:liveout(BB_in_out_liveness, Label).  %% Registers stuff -allocatable() -> +allocatable(no_context) ->    hipe_sparc_registers:allocatable_fpr(). -all_precoloured() -> -  allocatable(). +all_precoloured(Ctx) -> +  allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    hipe_sparc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) ->    false. -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) ->    hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(sparc),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) ->    hipe_sparc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> +  hipe_sparc_cfg:bb_add(CFG,L,BB). +  %% SPARC stuff -def_use(I) -> -  {defines(I), uses(I)}. +def_use(I, Ctx) -> +  {defines(I,Ctx), uses(I,Ctx)}. -uses(I) -> +uses(I, _) ->    hipe_sparc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) ->    hipe_sparc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> +  hipe_sparc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) ->    hipe_sparc:is_pseudo_fmove(I). -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_sparc:temp_reg(Reg). +new_reg_nr(_) -> +  hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, _Temp, _) -> +  hipe_sparc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> +  hipe_sparc_subst:insn_temps( +    fun(Op) -> +	case hipe_sparc:temp_is_allocatable(Op) +	  andalso hipe_sparc:temp_type(Op) =:= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). +  -ifdef(notdef). -new_spill_index(SpillIndex)-> +new_spill_index(SpillIndex, _)->    SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_sparc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) ->    false. -is_arg(_R) -> +is_arg(_R, _) ->    false. -args(_CFG) -> +args(_CFG, _) ->    [].  -endif. diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl index 4085a0e1a7..b683d08054 100644 --- a/lib/hipe/regalloc/hipe_temp_map.erl +++ b/lib/hipe/regalloc/hipe_temp_map.erl @@ -33,10 +33,12 @@  -module(hipe_temp_map). --export([cols2tuple/2, is_spilled/2, to_substlist/1]). +-export([cols2tuple/3, find/2, is_spilled/2, to_substlist/1]).  -include("../main/hipe.hrl"). +-type target_context() :: any(). +  %%----------------------------------------------------------------------------  %% Convert a list of [{R0, C1}, {R1, C2}, ...] to a temp_map  %% (Currently implemented as a tuple) tuple {C1, C2, ...}. @@ -47,34 +49,32 @@  %% element 1  %%---------------------------------------------------------------------------- --spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map(). +-spec cols2tuple(hipe_map(), module(), target_context()) -> hipe_temp_map(). -cols2tuple(Map, Target) -> -  ?ASSERT(check_list(Map)), -  SortedMap = lists:keysort(1, Map),  -  cols2tuple(0, SortedMap, [], Target).  +cols2tuple(Map, TgtMod, TgtCtx) -> +  SortedMap = lists:keysort(1, Map), +  cols2tuple(0, SortedMap, [], TgtMod, TgtCtx). -%% sorted_cols2tuple(Map, Target) -> -%%   ?ASSERT(check_list(Map)), +%% sorted_cols2tuple(Map, TgtMod, TgtCtx) ->  %%   ?ASSERT(Map =:= lists:keysort(1, Map)), -%%   cols2tuple(0, Map, [], Target).  +%%   cols2tuple(0, Map, [], TgtMod, TgtCtx).   %% Build a dense mapping  -cols2tuple(_, [], Vs, _) -> +cols2tuple(_, [], Vs, _, _) ->    %% Done reverse the list and convert to tuple.    list_to_tuple(lists:reverse(Vs)); -cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R -> +cols2tuple(N, [{R, C}|Ms], Vs, TgtMod, TgtCtx) when N =:= R ->    %% N makes sure the mapping is dense. N is he next key. -  cols2tuple(N+1, Ms, [C|Vs], Target); -cols2tuple(N, SourceMapping, Vs, Target) -> +  cols2tuple(N+1, Ms, [C|Vs], TgtMod, TgtCtx); +cols2tuple(N, SourceMapping=[{R,_}|_], Vs, TgtMod, TgtCtx) when N < R ->    %% The source was sparse, make up some placeholders...    Val = 	       -    case Target:is_precoloured(N) of +    case TgtMod:is_precoloured(N, TgtCtx) of        %% If it is precoloured, we know what to map it to.        true -> {reg, N};        false -> unknown      end, -  cols2tuple(N+1, SourceMapping, [Val|Vs], Target). +  cols2tuple(N+1, SourceMapping, [Val|Vs], TgtMod, TgtCtx).  %%  %% True if temp Temp is spilled. @@ -82,7 +82,7 @@ cols2tuple(N, SourceMapping, Vs, Target) ->  -spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean().  is_spilled(Temp, Map) -> -  case element(Temp+1, Map) of +  case find(Temp, Map) of      {reg, _R} -> false;      {fp_reg, _R}-> false;      {spill, _N} -> true; @@ -106,9 +106,10 @@ is_spilled(Temp, Map) ->  %%     {spill, _N} -> false;  %%     unknown -> false  %%   end. -%%  -%% %% Returns the inf temp Temp is mapped to. -%% find(Temp, Map) -> element(Temp+1, Map). + +%% Returns the inf temp Temp is mapped to. +find(Temp, Map) when Temp < tuple_size(Map) -> element(Temp+1, Map); +find(_,    Map) when is_tuple(Map) -> unknown. % consistency with cols2tuple/3  %% diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index 4edf8674b7..67c45cdca5 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -25,100 +25,105 @@  -define(HIPE_X86_REGISTERS, hipe_amd64_registers).  -define(HIPE_X86_LIVENESS, hipe_amd64_liveness).  -define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_SUBST, hipe_amd64_subst).  -else.  -define(HIPE_X86_SPECIFIC, hipe_x86_specific).  -define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions).  -define(HIPE_X86_REGISTERS, hipe_x86_registers).  -define(HIPE_X86_LIVENESS, hipe_x86_liveness).  -define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_SUBST, hipe_x86_subst).  -endif.  -module(?HIPE_X86_SPECIFIC). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]).  %% The following exports are used as M:F(...) calls from other modules;  %% e.g. hipe_x86_ra_ls. --export([analyze/1, -	 bb/2, -	 args/1, -	 labels/1, -	 livein/2, -	 liveout/2, -	 uses/1, -	 defines/1, -	 def_use/1, -	 is_arg/1,	% used by hipe_ls_regalloc -	 is_move/1, -	 is_fixed/1,	% used by hipe_graph_coloring_regalloc -	 is_global/1, -	 is_precoloured/1, -	 reg_nr/1, -	 non_alloc/1, -	 allocatable/0, -	 physical_name/1, -	 all_precoloured/0, -	 new_spill_index/1,	% used by hipe_ls_regalloc -	 var_range/1, -	 breadthorder/1, -	 postorder/1, -	 reverse_postorder/1]). +-export([analyze/2, +	 bb/3, +	 args/2, +	 labels/2, +	 livein/3, +	 liveout/3, +	 uses/2, +	 defines/2, +	 defines_all_alloc/2, +	 def_use/2, +	 is_arg/2,	% used by hipe_ls_regalloc +	 is_move/2, +	 is_fixed/2,	% used by hipe_graph_coloring_regalloc +	 is_global/2, +	 is_precoloured/2, +	 reg_nr/2, +	 non_alloc/2, +	 allocatable/1, +	 physical_name/2, +	 all_precoloured/1, +	 new_spill_index/2,	% used by hipe_ls_regalloc +	 var_range/2, +	 breadthorder/2, +	 postorder/2, +	 reverse_postorder/2]).  %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, -	 check_and_rewrite/2]). +-export([check_and_rewrite/3]). -defun_to_cfg(Defun) -> -  hipe_x86_cfg:init(Defun). +%% callbacks for hipe_regalloc_prepass +-export([new_reg_nr/1, +	 update_reg_nr/3, +	 update_bb/4, +	 subst_temps/3]). -check_and_rewrite(Defun, Coloring) -> -  ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> +  ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) ->    hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_x86_cfg:postorder(CFG).  %% Globally defined registers for linear scan -is_global(R) -> +is_global(R, _) ->    ?HIPE_X86_REGISTERS:temp1() =:= R orelse    ?HIPE_X86_REGISTERS:temp0() =:= R orelse    ?HIPE_X86_REGISTERS:is_fixed(R). -is_fixed(R) -> +is_fixed(R, _) ->    ?HIPE_X86_REGISTERS:is_fixed(R). -is_arg(R) -> +is_arg(R, _) ->    ?HIPE_X86_REGISTERS:is_arg(R). -args(CFG) -> +args(CFG, _) ->    ?HIPE_X86_REGISTERS:args(hipe_x86_cfg:arity(CFG)). -non_alloc(CFG) -> -  non_alloc(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). +non_alloc(CFG, _) -> +  non_alloc_1(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)).  %% same as hipe_x86_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> [].  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) ->    [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L),  	hipe_x86:temp_is_allocatable(X),  	hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(),  	hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:heap_limit(),  	hipe_x86:temp_type(X) =/= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) ->    [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label),  	hipe_x86:temp_is_allocatable(X),  	hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), @@ -127,37 +132,40 @@ liveout(BB_in_out_liveness,Label) ->  %% Registers stuff -allocatable() -> +allocatable(_) ->    ?HIPE_X86_REGISTERS:allocatable(). -all_precoloured() -> +all_precoloured(_) ->    ?HIPE_X86_REGISTERS:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg,_) ->    ?HIPE_X86_REGISTERS:is_precoloured(Reg). -physical_name(Reg) -> +physical_name(Reg,_) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG,_) ->    hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG,_) ->    hipe_gensym:var_range(x86). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG,_) ->    Highest_temporary = hipe_gensym:get_var(x86),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) ->    hipe_x86_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> +  hipe_x86_cfg:bb_add(CFG,L,BB). +  %% X86 stuff -def_use(Instruction) -> +def_use(Instruction,_) ->    {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction),  	 hipe_x86:temp_is_allocatable(X),  	 hipe_x86:temp_type(X) =/= 'double'], @@ -166,17 +174,19 @@ def_use(Instruction) ->  	 hipe_x86:temp_type(X) =/= 'double']    }. -uses(I) -> +uses(I,_) ->    [X || X <- ?HIPE_X86_DEFUSE:insn_use(I),  	hipe_x86:temp_is_allocatable(X),  	hipe_x86:temp_type(X) =/= 'double']. -defines(I) -> +defines(I,_) ->    [X || X <- ?HIPE_X86_DEFUSE:insn_def(I),  	hipe_x86:temp_is_allocatable(X),  	hipe_x86:temp_type(X) =/= 'double']. -is_move(Instruction) -> +defines_all_alloc(I,_) -> ?HIPE_X86_DEFUSE:insn_defs_all(I). + +is_move(Instruction,_) ->    case hipe_x86:is_move(Instruction) of      true ->        Src = hipe_x86:move_src(Instruction), @@ -197,8 +207,25 @@ is_move(Instruction) ->      false -> false    end. -reg_nr(Reg) -> +reg_nr(Reg,_) ->    hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_reg_nr(_) -> +  hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, Temp, _) -> +  hipe_x86:mk_temp(Nr, hipe_x86:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> +  ?HIPE_X86_SUBST:insn_temps( +    fun(Op) -> +	case hipe_x86:temp_is_allocatable(Op) +	  andalso hipe_x86:temp_type(Op) =/= 'double' +	of +	  true -> SubstFun(Op); +	  false -> Op +	end +    end, Instr). + +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) ->    SpillIndex+1. diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index ece07cb2f9..85923f8f44 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -32,110 +32,118 @@  -endif.  -module(?HIPE_X86_SPECIFIC_X87). --export([allocatable/0, -	 is_precoloured/1, -	 %% var_range/1, -	 %% def_use/1, -	 %% is_fixed/1, -	 is_arg/1, -	 %% non_alloc/1, -	 new_spill_index/1, -	 number_of_temporaries/1 +-export([allocatable/2, +	 is_precoloured/2, +	 %% var_range/2, +	 %% def_use/2, +	 %% is_fixed/2, +	 is_arg/2, +	 %% non_alloc/2, +	 new_spill_index/2, +	 number_of_temporaries/2  	]).  %% The following exports are used as M:F(...) calls from other modules;  %% e.g. hipe_x86_ra_ls. --export([analyze/1, -	 bb/2, -	 args/1, -	 labels/1, -	 livein/2, -	 liveout/2, -	 uses/1, -	 defines/1, -	 is_global/1, -	 reg_nr/1, -	 physical_name/1, -	 breadthorder/1, -	 postorder/1, - 	 reverse_postorder/1]). - -breadthorder(CFG) -> +-export([analyze/2, +	 bb/3, +	 args/2, +	 labels/2, +	 livein/3, +	 liveout/3, +	 uses/2, +	 defines/2, +	 defines_all_alloc/2, +	 is_global/2, +	 reg_nr/2, +	 physical_name/2, +	 breadthorder/2, +	 postorder/2, +	 reverse_postorder/2]). + +%% callbacks for hipe_x86_ra_ls +-export([check_and_rewrite/4]). + +%% Rewrite happens in hipe_x86_ra_finalise:finalise/4 +check_and_rewrite(CFG, _Coloring, 'linearscan', _) -> +  {CFG, false}. + +breadthorder(CFG, _) ->    hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) ->    hipe_x86_cfg:postorder(CFG). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) ->    hipe_x86_cfg:reverse_postorder(CFG). -is_global(_) -> +is_global(_, _) ->    false.  -ifdef(notdef). -is_fixed(_) -> +is_fixed(_, _) ->    false.  -endif. -is_arg(_) -> +is_arg(_, _) ->    false. -args(_) -> +args(_, _) ->    [].  -ifdef(notdef). -non_alloc(_) -> +non_alloc(_, _) ->    [].  -endif.  %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) ->    ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) ->    [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L),   	     hipe_x86:temp_is_allocatable(X),   	     hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) ->    [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label),  	     hipe_x86:temp_is_allocatable(X),  	     hipe_x86:temp_type(X) =:= 'double'].  %% Registers stuff -allocatable() -> +allocatable('linearscan', _) ->    ?HIPE_X86_REGISTERS:allocatable_x87(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) ->    ?HIPE_X86_REGISTERS:is_precoloured_x87(Reg). -physical_name(Reg) -> +physical_name(Reg, _) ->    Reg.  %% CFG stuff -labels(CFG) -> +labels(CFG, _) ->    hipe_x86_cfg:labels(CFG).  -ifdef(notdef). -var_range(_CFG) -> +var_range(_CFG, _) ->    {Min,Max} = hipe_gensym:var_range(x86),    %% io:format("Var_range: ~w\n",[{Min,Max}]),    {Min,Max}.  -endif. -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) ->    Highest_temporary = hipe_gensym:get_var(x86),    %% Since we can have temps from 0 to Max adjust by +1.    Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) ->    hipe_x86_cfg:bb(CFG,L).  %% X86 stuff  -ifdef(notdef). -def_use(Instruction) -> +def_use(Instruction, _) ->    {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction),  	      hipe_x86:temp_is_allocatable(X),  	      temp_is_double(X)], @@ -145,21 +153,23 @@ def_use(Instruction) ->    }.  -endif. -uses(I) -> +uses(I, _) ->    [X || X <- ?HIPE_X86_DEFUSE:insn_use(I),   	     hipe_x86:temp_is_allocatable(X),   	     temp_is_double(X)]. -defines(I) -> +defines(I, _) ->    [X || X <- ?HIPE_X86_DEFUSE:insn_def(I),   	     hipe_x86:temp_is_allocatable(X),   	     temp_is_double(X)]. +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). +  temp_is_double(Temp) ->    hipe_x86:temp_type(Temp) =:= 'double'. -reg_nr(Reg) -> +reg_nr(Reg, _) ->    hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) ->    SpillIndex+1. diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl index fb9c0c196d..ad23df80d2 100644 --- a/lib/hipe/rtl/hipe_rtl_binary.erl +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -106,10 +106,20 @@ create_lbls(0) ->  %%------------------------------------------------------------------------------  get_word_integer(Var, Register, SystemLimitLblName, FalseLblName) -> -  [EndLbl] = create_lbls(1), -  EndName = hipe_rtl:label_name(EndLbl), -  get_word_integer(Var, Register,SystemLimitLblName,  FalseLblName, EndName, EndName, -		   [EndLbl]). +  case hipe_rtl:is_imm(Var) of +    true -> +      TaggedVal = hipe_rtl:imm_value(Var), +      true = hipe_tagscheme:is_fixnum(TaggedVal), +      Val = hipe_tagscheme:fixnum_val(TaggedVal), +      if Val < 0 -> [hipe_rtl:mk_goto(FalseLblName)]; +	true -> [hipe_rtl:mk_move(Register, hipe_rtl:mk_imm(Val))] +      end; +    false -> +      [EndLbl] = create_lbls(1), +      EndName = hipe_rtl:label_name(EndLbl), +      get_word_integer(Var, Register,SystemLimitLblName,  FalseLblName, +		       EndName, EndName, [EndLbl]) +  end.  get_word_integer(Var, Register, SystemLimitLblName, FalseLblName, TrueLblName,  		 BigLblName, Tail) -> diff --git a/lib/hipe/rtl/hipe_rtl_primops.erl b/lib/hipe/rtl/hipe_rtl_primops.erl index 062fab842f..835f489ec0 100644 --- a/lib/hipe/rtl/hipe_rtl_primops.erl +++ b/lib/hipe/rtl/hipe_rtl_primops.erl @@ -845,7 +845,7 @@ gen_free_vars([], _, _, _, AccCode) -> AccCode.  %% call_fun (also handles enter_fun when Continuation = [])  gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) ->   -  NAddressReg = hipe_rtl:mk_new_reg(), +  NCNAddressReg = hipe_rtl:mk_new_reg(),    ArityReg = hipe_rtl:mk_new_reg_gcsafe(),    [Fun|RevArgs] = lists:reverse(ArgsAndFun), @@ -856,7 +856,7 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) ->    BadFunLabName = hipe_rtl:label_name(NonClosureLabel),    BadFunCode =      [NonClosureLabel, -     hipe_rtl:mk_call([NAddressReg], +     hipe_rtl:mk_call([NCNAddressReg],  		      'nonclosure_address',  		      [Fun, hipe_rtl:mk_imm(length(Args))],  		      hipe_rtl:label_name(CallNonClosureLabel), @@ -865,25 +865,26 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) ->       CallNonClosureLabel,       case Continuation of         [] -> -	 hipe_rtl:mk_enter(NAddressReg, Args, not_remote); +	 hipe_rtl:mk_enter(NCNAddressReg, Args, not_remote);         _ -> -	 hipe_rtl:mk_call(Dst, NAddressReg, Args, +	 hipe_rtl:mk_call(Dst, NCNAddressReg, Args,  			  Continuation, Fail, not_remote)       end],    {BadArityLabName, BadArityCode} = gen_fail_code(Fail, {badarity, Fun}), -  CheckGetCode =  -    hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, NAddressReg, +  CNAddressReg = hipe_rtl:mk_new_reg(), +  CheckGetCode = +    hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, CNAddressReg,  						Fun, BadFunLabName,  						0.9),    CheckArityCode = check_arity(ArityReg, length(RevArgs), BadArityLabName),    CallCode =      case Continuation of        [] -> %% This is a tailcall -	[hipe_rtl:mk_enter(NAddressReg, ArgsAndFun, not_remote)]; +	[hipe_rtl:mk_enter(CNAddressReg, ArgsAndFun, not_remote)];        _ -> %% Ordinary call -	[hipe_rtl:mk_call(Dst, NAddressReg, ArgsAndFun, +	[hipe_rtl:mk_call(Dst, CNAddressReg, ArgsAndFun,  			  Continuation, Fail, not_remote)]      end,    [CheckGetCode, CheckArityCode, CallCode, BadFunCode, BadArityCode]. diff --git a/lib/hipe/sparc/Makefile b/lib/hipe/sparc/Makefile index 0e36a43d8e..ac1230df7c 100644 --- a/lib/hipe/sparc/Makefile +++ b/lib/hipe/sparc/Makefile @@ -63,7 +63,8 @@ MODULES=hipe_rtl_to_sparc \  	hipe_sparc_ra_naive \  	hipe_sparc_ra_postconditions \  	hipe_sparc_ra_postconditions_fp \ -	hipe_sparc_registers +	hipe_sparc_registers \ +	hipe_sparc_subst  HRL_FILES=hipe_sparc.hrl  ERL_FILES=$(MODULES:%=%.erl) diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl index f9c043eafe..e170fec3d6 100644 --- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -625,7 +625,7 @@ conv_return(I, Map, Data) ->    {I2, Map0, Data}.  conv_store(I, Map, Data) -> -  {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), % no immediates allowed +  {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map),    {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),    {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),    StOp = conv_stop(hipe_rtl:store_size(I)), @@ -649,13 +649,27 @@ mk_store(StOp, Src, Base1, Base2) ->    end.  mk_store2(StOp, Src, Base1, Base2) -> -  case hipe_sparc:is_temp(Base2) of +  case hipe_sparc:is_temp(Base1) of      true -> -      mk_store_rr(StOp, Src, Base1, Base2); +      case hipe_sparc:is_temp(Base2) of +	true -> +	  mk_store_rr(StOp, Src, Base1, Base2); +	_ -> +	  mk_store_ri(StOp, Src, Base1, Base2) +      end;      _ -> -      mk_store_ri(StOp, Src, Base1, Base2) +      case hipe_sparc:is_temp(Base2) of +	true -> +	  mk_store_ri(StOp, Src, Base2, Base1); +	_ -> +	  mk_store_ii(StOp, Src, Base1, Base2) +      end    end. +mk_store_ii(StOp, Src, Base, Disp) -> +  Tmp = new_untagged_temp(), +  mk_set(Base, Tmp, mk_store_ri(StOp, Src, Tmp, Disp)). +  mk_store_ri(StOp, Src, Base, Disp) ->    hipe_sparc:mk_store(StOp, Src, Base, Disp, 'new', []). diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl index 0b2c77f27b..957c8a0d24 100644 --- a/lib/hipe/sparc/hipe_sparc_cfg.erl +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -24,6 +24,7 @@  -export([init/1,           labels/1, start_label/1,           succ/2, +         map_bbs/2, fold_bbs/3,           bb/2, bb_add/3]).  -export([postorder/1, reverse_postorder/1]).  -export([linearise/1]). diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl index 4f66299f1d..4b5a19a19d 100644 --- a/lib/hipe/sparc/hipe_sparc_defuse.erl +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -23,6 +23,7 @@  -export([insn_def_all/1, insn_use_all/1]).  -export([insn_def_gpr/1, insn_use_gpr/1]).  -export([insn_def_fpr/1, insn_use_fpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]).  -include("hipe_sparc.hrl").  %%% @@ -51,6 +52,12 @@ insn_def_gpr(I) ->      _ -> []    end. +insn_defs_all_gpr(I) -> +  case I of +    #pseudo_call{} -> true; +    _ -> false +  end. +  call_clobbered_gpr() ->    [hipe_sparc:mk_temp(R, T)     || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -115,6 +122,12 @@ insn_def_fpr(I) ->      _ -> []    end. +insn_defs_all_fpr(I) -> +  case I of +    #pseudo_call{} -> true; +    _ -> false +  end. +  call_clobbered_fpr() ->    [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()]. diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index a42c1983f4..37f29e660a 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -25,16 +25,14 @@  -include("hipe_sparc.hrl").  -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> -  Formals = fix_formals(hipe_sparc:defun_formals(Defun)), -  Temps0 = all_temps(hipe_sparc:defun_code(Defun), Formals), -  MinFrame = defun_minframe(Defun), +frame(CFG) -> +  Formals = fix_formals(hipe_sparc_cfg:params(CFG)), +  Temps0 = all_temps(CFG, Formals), +  MinFrame = defun_minframe(CFG),    Temps = ensure_minframe(MinFrame, Temps0), -  ClobbersRA = clobbers_ra(hipe_sparc:defun_code(Defun)), -  CFG0 = hipe_sparc_cfg:init(Defun), -  Liveness = hipe_sparc_liveness_all:analyse(CFG0), -  CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersRA), -  hipe_sparc_cfg:linearise(CFG1). +  ClobbersRA = clobbers_ra(CFG), +  Liveness = hipe_sparc_liveness_all:analyse(CFG), +  do_body(CFG, Liveness, Formals, Temps, ClobbersRA).  fix_formals(Formals) ->    fix_formals(hipe_sparc_registers:nr_args(), Formals). @@ -550,29 +548,41 @@ temp_is_pseudo(Temp) ->  %%% Detect if a Defun's body clobbers RA.  %%% -clobbers_ra(Insns) -> -  case Insns of -    [#pseudo_call{}|_] -> true; -    %% moves to RA cannot occur yet -    [_|Rest] -> clobbers_ra(Rest); -    [] -> false +clobbers_ra(CFG) -> +  any_insn(fun(#pseudo_call{}) -> true; +	      (_) -> false +	   end, CFG). + +any_insn(Pred, CFG) -> +  %% Abuse fold to do an efficient "any"-operation using nonlocal control flow +  FoundSatisfying = make_ref(), +  try fold_insns(fun (I, _) -> +		     case Pred(I) of +		       true -> throw(FoundSatisfying); +		       false -> false +		     end +		 end, false, CFG) +  of _ -> false +  catch FoundSatisfying -> true    end.  %%%  %%% Build the set of all temps used in a Defun's body.  %%% -all_temps(Code, Formals) -> -  S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> +  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),    S1 = tset_del_list(S0, Formals),    tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) ->    S1 = tset_add_list(S0, hipe_sparc_defuse:insn_def_all(I)), -  S2 = tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)), -  find_temps(Insns, S2); -find_temps([], S) -> -  S. +  tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> +  hipe_sparc_cfg:fold_bbs( +    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, +    InitAcc, CFG).  tset_empty() ->    gb_sets:new(). @@ -601,16 +611,11 @@ tset_to_list(S) ->  %%% in the middle of a tailcall.  %%% -defun_minframe(Defun) -> -  MaxTailArity = body_mta(hipe_sparc:defun_code(Defun), 0), -  MyArity = length(fix_formals(hipe_sparc:defun_formals(Defun))), +defun_minframe(CFG) -> +  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), +  MyArity = length(fix_formals(hipe_sparc_cfg:params(CFG))),    erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> -  body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> -  MTA. -  insn_mta(I, MTA) ->    case I of      #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/sparc/hipe_sparc_main.erl b/lib/hipe/sparc/hipe_sparc_main.erl index c16751c7bd..8e9c560bb2 100644 --- a/lib/hipe/sparc/hipe_sparc_main.erl +++ b/lib/hipe/sparc/hipe_sparc_main.erl @@ -24,12 +24,14 @@  rtl_to_sparc(MFA, RTL, Options) ->    Defun1 = hipe_rtl_to_sparc:translate(RTL), +  CFG1 = hipe_sparc_cfg:init(Defun1),    %% io:format("~w: after translate\n", [?MODULE]),    %% hipe_sparc_pp:pp(Defun1), -  Defun2 = hipe_sparc_ra:ra(Defun1, Options), +  CFG2 = hipe_sparc_ra:ra(CFG1, Options),    %% io:format("~w: after regalloc\n", [?MODULE]), -  %% hipe_sparc_pp:pp(Defun2), -  Defun3 = hipe_sparc_frame:frame(Defun2), +  %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), +  CFG3 = hipe_sparc_frame:frame(CFG2), +  Defun3 = hipe_sparc_cfg:linearise(CFG3),    %% io:format("~w: after frame\n", [?MODULE]),    %% hipe_sparc_pp:pp(Defun3),    Defun4 = hipe_sparc_finalise:finalise(Defun3), diff --git a/lib/hipe/sparc/hipe_sparc_ra.erl b/lib/hipe/sparc/hipe_sparc_ra.erl index afea8c9b4c..c4b909528d 100644 --- a/lib/hipe/sparc/hipe_sparc_ra.erl +++ b/lib/hipe/sparc/hipe_sparc_ra.erl @@ -22,36 +22,39 @@  -module(hipe_sparc_ra).  -export([ra/2]). -ra(Defun0, Options) -> -  %% hipe_sparc_pp:pp(Defun0), -  {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> +  %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG0)), +  {CFG1, _FPLiveness1, Coloring_fp, SpillIndex}      = case proplists:get_bool(inline_fp, Options) of  	true -> -	  hipe_regalloc_loop:ra_fp(Defun0, Options, +	  FPLiveness0 = hipe_sparc_specific_fp:analyze(CFG0, no_context), +	  hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options,  				   hipe_coalescing_regalloc, -				   hipe_sparc_specific_fp); +				   hipe_sparc_specific_fp, no_context);  	false -> -	  {Defun0,[],0} +	  {CFG0,undefined,[],0}        end, -  %% hipe_sparc_pp:pp(Defun1), -  {Defun2, Coloring} +  %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG1)), +  GPLiveness1 = hipe_sparc_specific:analyze(CFG1, no_context), +  {CFG2, _GPLiveness2, Coloring}      = case proplists:get_value(regalloc, Options, coalescing) of  	coalescing -> -	  ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc);  	optimistic -> -	  ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc);  	graph_color -> -	  ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); +	  ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_graph_coloring_regalloc);  	linear_scan -> -	  hipe_sparc_ra_ls:ra(Defun1, SpillIndex, Options); +	  hipe_sparc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options);  	naive -> -	  hipe_sparc_ra_naive:ra(Defun1, Coloring_fp, Options); +	  hipe_sparc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options);          _ ->  	  exit({unknown_regalloc_compiler_option,  		proplists:get_value(regalloc,Options)})        end, -  %% hipe_sparc_pp:pp(Defun2), -  hipe_sparc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). +  %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), +  hipe_sparc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> -  hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> +  hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, +			hipe_sparc_specific, no_context). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl index dc1e69c101..5d6056071c 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -23,13 +23,14 @@  -export([finalise/3]).  -include("hipe_sparc.hrl"). -finalise(Defun, TempMap, FPMap0) -> -  Code = hipe_sparc:defun_code(Defun), -  {_, SpillLimit} = hipe_sparc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> +  {_, SpillLimit} = hipe_gensym:var_range(sparc),    Map = mk_ra_map(TempMap, SpillLimit),    FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), -  NewCode = ra_code(Code, Map, FPMap1, []), -  Defun#defun{code=NewCode}. +  hipe_sparc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> +  hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])).  ra_code([I|Insns], Map, FPMap, Accum) ->    ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); diff --git a/lib/hipe/sparc/hipe_sparc_ra_ls.erl b/lib/hipe/sparc/hipe_sparc_ra_ls.erl index 19e7c92d2f..7019937737 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_ls.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_ls.erl @@ -21,37 +21,35 @@  %% Linear Scan register allocator for SPARC  -module(hipe_sparc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> -  NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), -  CFG = hipe_sparc_cfg:init(NewDefun), -  SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG), -  alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> +  SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG, no_context), +  alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> -  CFG = hipe_sparc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) ->    {Coloring, _NewSpillIndex} =      regalloc( -      CFG, +      CFG, Liveness,        hipe_sparc_registers:allocatable_gpr()--        [hipe_sparc_registers:temp3(),         hipe_sparc_registers:temp2(),         hipe_sparc_registers:temp1()],        [hipe_sparc_cfg:start_label(CFG)],        SpillIndex, SpillLimit, Options, -      hipe_sparc_specific), -  {NewDefun, _DidSpill} = +      hipe_sparc_specific, no_context), +  {NewCFG, _DidSpill} =      hipe_sparc_ra_postconditions:check_and_rewrite( -      Defun, Coloring, 'linearscan'), -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), +      CFG, Coloring, 'linearscan'), +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context),    {TempMap2,_NewSpillIndex2} = -    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, -			     hipe_sparc_specific, TempMap), +    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, +			     hipe_sparc_specific, no_context, TempMap),    Coloring2 =      hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), -  {NewDefun, Coloring2}. +  {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> -  hipe_ls_regalloc:regalloc( -    CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +	 TgtMod, TgtCtx) -> +  hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, +			    DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/sparc/hipe_sparc_ra_naive.erl b/lib/hipe/sparc/hipe_sparc_ra_naive.erl index b6c33dec6c..745e44f2f9 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_naive.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_naive.erl @@ -20,11 +20,11 @@  %%  -module(hipe_sparc_ra_naive). --export([ra/3]). +-export([ra/4]).  -include("hipe_sparc.hrl"). -ra(Defun, _Coloring_fp, _Options) ->	% -> {Defun, Coloring} -  {NewDefun,_DidSpill} = -    hipe_sparc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), -  {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) ->	% -> {CFG, Liveness, Coloring} +  {NewCFG,_DidSpill} = +    hipe_sparc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), +  {NewCFG, Liveness, []}. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl index ab31b3c8d9..e8e231e35c 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -25,17 +25,13 @@  -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), -  check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context), +  check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) ->    Strategy = strategy(Allocator), -  #defun{code=Code0} = Defun, -  {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), -  VarRange = {0, hipe_gensym:get_var(sparc)}, -  {Defun#defun{code=Code1, var_range=VarRange}, -   DidSpill}. +  do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, Strategy, CFG, false).  strategy(Allocator) ->    case Allocator of @@ -44,6 +40,13 @@ strategy(Allocator) ->      'naive' -> 'fixed'    end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), +  CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). +  do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),    do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl index d893ac26e9..544b8b05a8 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -25,13 +25,17 @@  -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring) -> -  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp), -  #defun{code=Code0} = Defun, -  {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), -  VarRange = {0, hipe_gensym:get_var(sparc)}, -  {Defun#defun{code=Code1, var_range=VarRange}, -   DidSpill}. +check_and_rewrite(CFG, Coloring) -> +  TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp, +				     no_context), +  do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), +  CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, CFG, DidSpill).  do_insns([I|Insns], TempMap, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap), diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl index 6681a10070..20138836dd 100644 --- a/lib/hipe/sparc/hipe_sparc_registers.erl +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -249,6 +249,8 @@ is_arg(R) ->      _ -> false    end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_sparc_defuse:insn_defs_all_gpr/1  call_clobbered() ->		% does the RA strip the type or not?    [%% ?G0 is the non-allocatable constant zero     {?G1,tagged},{?G1,untagged}, diff --git a/lib/hipe/sparc/hipe_sparc_subst.erl b/lib/hipe/sparc/hipe_sparc_subst.erl new file mode 100644 index 0000000000..e5cd244985 --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_subst.erl @@ -0,0 +1,85 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2016. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% + +-module(hipe_sparc_subst). +-export([insn_temps/2]). +-include("hipe_sparc.hrl"). + +%% These should be moved to hipe_sparc and exported +-type temp()    :: #sparc_temp{}. +-type src2()    :: temp() | #sparc_simm13{}. +-type src2b()   :: src2() | #sparc_uimm5{}. +-type funv()    :: #sparc_mfa{} | #sparc_prim{} | temp(). +-type arg()     :: temp() | integer(). +-type insn()    :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> +  S2 = fun(O) -> src2_temps(T, O) end, +  S2B = fun(O) -> src2b_temps(T, O) end, +  Arg = fun(O) -> arg_temps(T, O) end, +  case I of +      #alu{src1=L,src2=R,dst=D} -> I#alu{src1=T(L),src2=S2B(R),dst=T(D)}; +      #bp{} -> I; +      #comment{} -> I; +      #jmp{src1=L,src2=R} -> I#jmp{src1=T(L),src2=S2(R)}; +      #label{} -> I; +      #pseudo_bp{} -> I; +      #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T,F)}; +      #pseudo_call_prepare{} -> I; +      #pseudo_move{src=S,dst=D} -> I#pseudo_move{src=T(S),dst=T(D)}; +      #pseudo_ret{} -> I; +      #pseudo_set{dst=D}-> I#pseudo_set{dst=T(D)}; +      #pseudo_tailcall{funv=F,stkargs=Stk} -> +	  I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; +      #pseudo_tailcall_prepare{} -> I; +      #rdy{dst=D} -> I#rdy{dst=T(D)}; +      #sethi{dst=D} -> I#sethi{dst=T(D)}; +      #store{src=S,base=B,disp=D} -> I#store{src=T(S),base=T(B),disp=S2(D)}; +      #fp_binary{src1=L,src2=R,dst=D} -> +	  I#fp_binary{src1=T(L),src2=T(R),dst=T(D)}; +      #fp_unary{src=S,dst=D} -> I#fp_unary{src=T(S),dst=T(D)}; +      #pseudo_fload{base=B,disp=Di,dst=Ds} -> +	  I#pseudo_fload{base=T(B),disp=S2(Di),dst=T(Ds)}; +      #pseudo_fmove{src=S,dst=D} -> I#pseudo_fmove{src=T(S),dst=T(D)}; +      #pseudo_fstore{src=S,base=B,disp=D} -> +	  I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)} +  end. + +-spec src2_temps(subst_fun(), src2()) -> src2(). +src2_temps(_SubstTemp, I=#sparc_simm13{}) -> I; +src2_temps(SubstTemp,  T=#sparc_temp{}) -> SubstTemp(T). + +-spec src2b_temps(subst_fun(), src2b()) -> src2b(). +src2b_temps(_SubstTemp, I=#sparc_uimm5{}) -> I; +src2b_temps(SubstTemp, Op) -> src2_temps(SubstTemp, Op). + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#sparc_mfa{}) -> M; +funv_temps(_SubstTemp, P=#sparc_prim{}) -> P; +funv_temps(SubstTemp,  T=#sparc_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp,  T=#sparc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ssa/hipe_ssa_liveness.inc b/lib/hipe/ssa/hipe_ssa_liveness.inc index 78488c65fc..46df8b66ad 100644 --- a/lib/hipe/ssa/hipe_ssa_liveness.inc +++ b/lib/hipe/ssa/hipe_ssa_liveness.inc @@ -40,6 +40,15 @@  	 ssa_liveness__livein/2]).  %%	 ssa_liveness__livein/3],  %%	 ssa_liveness__liveout/2]). +-type set(E) :: gb_sets:set(E). +-type liveness(Label, Var) :: +	#{Label => {{Gen          :: set(Var), +		     Kill         :: set(Var), +		     {TotalDirGen :: set(Var), +		      DirGen      :: gb_trees:tree(Label, set(Var))}}, +		    LiveIn        :: set(Var), +		    LiveOut       :: set(Var), +		    Successors    :: [Label]}}.  -endif.  %% -ifdef(DEBUG_LIVENESS).  %% -export([pp_liveness/1]). @@ -262,21 +271,15 @@ update_directed_gen({Pred, Var}, Map)->  %%  %% liveness  %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}).  liveness_init(List) -> -  liveness_init1(List, gb_trees:empty()). +  maps:from_list(List). -liveness_init1([{Label, Info}|Left], Map) -> -  liveness_init1(Left, gb_trees:insert(Label, Info, Map)); -liveness_init1([], Map) -> -  Map. - -liveness_lookup(Label, Map) -> -  {value, Info} = gb_trees:lookup(Label, Map), -  Info. - -liveness_update(Label, NewInfo, Map) -> -  gb_trees:update(Label, NewInfo, Map). +liveness_lookup(Label, Liveness) -> +  maps:get(Label, Liveness). +liveness_update(Label, Val, Liveness) -> +  maps:update(Label, Val, Liveness).  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/test/hipe_testsuite_driver.erl b/lib/hipe/test/hipe_testsuite_driver.erl index a3048d907e..88576775ca 100644 --- a/lib/hipe/test/hipe_testsuite_driver.erl +++ b/lib/hipe/test/hipe_testsuite_driver.erl @@ -99,7 +99,7 @@ write_suite(Suite) ->  write_header(#suite{suitename = SuiteName, outputfile = OutputFile,  		    testcases = TestCases}) ->      Exports = format_export(TestCases), -    TimeLimit = 5,	%% with 1 or 2 it fails on some slow machines... +    TimeLimit = 6,	%% with 1, 2, or 3 it fails on some slow machines...      io:format(OutputFile,  	      "%% ATTENTION!\n"  	      "%% This is an automatically generated file. Do not edit.\n\n" @@ -168,6 +168,10 @@ run(TestCase, Dir, _OutDir) ->      HiPEOpts = try TestCase:hipe_options() catch error:undef -> [] end,      {ok, TestCase} = hipe:c(TestCase, HiPEOpts),      ok = TestCase:test(), +    {ok, TestCase} = hipe:c(TestCase, [o1|HiPEOpts]), +    ok = TestCase:test(), +    {ok, TestCase} = hipe:c(TestCase, [o0|HiPEOpts]), +    ok = TestCase:test(),      ToLLVM = try TestCase:to_llvm() catch error:undef -> true end,      case ToLLVM andalso hipe:llvm_support_available() of  	true -> diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile index 66e9421c25..04de7f7823 100644 --- a/lib/hipe/util/Makefile +++ b/lib/hipe/util/Makefile @@ -113,4 +113,3 @@ release_docs_spec:  $(EBIN)/hipe_timing.beam: ../main/hipe.hrl -$(EBIN)/hipe_vectors.beam: hipe_vectors.hrl diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl index 7f6c8e91c2..90d736d02c 100644 --- a/lib/hipe/util/hipe_vectors.erl +++ b/lib/hipe/util/hipe_vectors.erl @@ -33,11 +33,25 @@  	 %% list_to_vector/1,  	 list/1]). --include("hipe_vectors.hrl"). +%%-define(USE_TUPLES, true). +%%-define(USE_GBTREES, true). +-define(USE_ARRAYS, true). + +-type vector() :: vector(_). +-export_type([vector/0, vector/1]). + +-spec new(non_neg_integer(), V) -> vector(E) when V :: E. +-spec set(vector(E), non_neg_integer(), V :: E) -> vector(E). +-spec get(vector(E), non_neg_integer()) -> E. +-spec size(vector(_)) -> non_neg_integer(). +-spec vector_to_list(vector(E)) -> [E]. +%% -spec list_to_vector([E]) -> vector(E). +-spec list(vector(E)) -> [{non_neg_integer(), E}].  %% ---------------------------------------------------------------------  -ifdef(USE_TUPLES). +-opaque vector(_) :: tuple().  new(N, V) ->      erlang:make_tuple(N, V). @@ -68,8 +82,8 @@ get(Vec, Ix) -> element(Ix+1, Vec).  %% ---------------------------------------------------------------------  -ifdef(USE_GBTREES). +-opaque vector(E) :: gb_trees:tree(non_neg_integer(), E). --spec new(non_neg_integer(), _) -> hipe_vector().  new(N, V) when is_integer(N), N >= 0 ->      gb_trees:from_orddict(mklist(N, V)). @@ -81,14 +95,11 @@ mklist(M, N, V) when M < N ->  mklist(_, _, _) ->      []. --spec size(hipe_vector()) -> non_neg_integer().  size(V) -> gb_trees:size(V). --spec list(hipe_vector()) -> [{_, _}].  list(Vec) ->      gb_trees:to_list(Vec). -%% -spec list_to_vector([_]) -> hipe_vector().  %% list_to_vector(Xs) ->  %%     gb_trees:from_orddict(index(Xs, 0)).  %%  @@ -97,16 +108,29 @@ list(Vec) ->  %% index([],_) ->  %%     []. --spec vector_to_list(hipe_vector()) -> [_].  vector_to_list(V) ->      gb_trees:values(V). --spec set(hipe_vector(), non_neg_integer(), _) -> hipe_vector().  set(Vec, Ix, V) ->      gb_trees:update(Ix, V, Vec). --spec get(hipe_vector(), non_neg_integer()) -> any().  get(Vec, Ix) ->      gb_trees:get(Ix, Vec).  -endif. %% ifdef USE_GBTREES + +%% --------------------------------------------------------------------- + +-ifdef(USE_ARRAYS). +%%-opaque vector(E) :: array:array(E). +-type vector(E) :: array:array(E). % Work around dialyzer bug + +new(N, V) -> array:new(N, {default, V}). +size(V) -> array:size(V). +list(Vec) -> array:to_orddict(Vec). +%% list_to_vector(Xs) -> array:from_list(Xs). +vector_to_list(V) -> array:to_list(V). +set(Vec, Ix, V) -> array:set(Ix, V, Vec). +get(Vec, Ix) -> array:get(Ix, Vec). + +-endif. %% ifdef USE_ARRAYS diff --git a/lib/hipe/util/hipe_vectors.hrl b/lib/hipe/util/hipe_vectors.hrl deleted file mode 100644 index d4556e9dc4..0000000000 --- a/lib/hipe/util/hipe_vectors.hrl +++ /dev/null @@ -1,29 +0,0 @@ -%% -%% %CopyrightBegin% -%%  -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%%  -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%%     http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%  -%% %CopyrightEnd% -%% -%%-define(USE_TUPLES, true). --define(USE_GBTREES, true). - --ifdef(USE_TUPLES). --type hipe_vector() :: tuple(). --endif. - --ifdef(USE_GBTREES). --type hipe_vector() :: gb_trees:tree(). --endif. diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..84edeaebe7 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,9 +60,9 @@ MODULES=hipe_rtl_to_x86 \  	hipe_x86_ra_ls \  	hipe_x86_ra_naive \  	hipe_x86_ra_postconditions \ -	hipe_x86_ra_x87_ls \  	hipe_x86_registers \  	hipe_x86_spill_restore \ +	hipe_x86_subst \  	hipe_x86_x87  HRL_FILES=hipe_x86.hrl @@ -133,7 +133,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl  $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl  $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl  $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl  $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl  $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl  $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..4c8c98551c 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -85,7 +85,7 @@ conv_insn(I, Map, Data) ->  	  true ->  	    conv_shift(Dst, Src1, BinOp, Src2);  	  false -> -	    conv_alu(Dst, Src1, BinOp, Src2, []) +	    conv_alu_nocc(Dst, Src1, BinOp, Src2, [])  	end,        {FixSrc1++FixSrc2++I2, Map2, Data};      #alub{} -> @@ -144,7 +144,7 @@ conv_insn(I, Map, Data) ->        {I2, Map, Data};      #load{} ->        {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), -      {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), +      {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0),        {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1),        I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of  	     {byte, signed} -> @@ -171,6 +171,7 @@ conv_insn(I, Map, Data) ->        Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)),        I2 = [hipe_x86:mk_move(Src, Dst)],        {I2, Map0, Data}; +    #move{src=Dst, dst=Dst} -> {[], Map, Data};      #move{} ->        {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map),        {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +183,11 @@ conv_insn(I, Map, Data) ->        I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]),        {FixArgs++I2, Map0, Data};      #store{} -> -      {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), +      {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map),        {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),        {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),        I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), -      {FixSrc++FixOff++I2, Map2, Data}; +      {FixPtr++FixSrc++FixOff++I2, Map2, Data};      #switch{} ->	% this one also updates Data :-(        %% from hipe_rtl2sparc, but we use a hairy addressing mode        %% instead of doing the arithmetic manually @@ -206,7 +207,7 @@ conv_insn(I, Map, Data) ->        {I2, Map1, NewData};      #fload{} ->        {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), -      {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), +      {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0),        {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1),        I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)],        {I2, Map2, Data}; @@ -249,6 +250,22 @@ conv_insn(I, Map, Data) ->  %%% Finalise the conversion of a 3-address ALU operation, taking  %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> +  case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) +    andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2)) +  of +    false -> conv_alu(Dst, Src1, 'add', Src2, Tail); +    true -> % Use LEA +      Type = typeof_dst(Dst), +      Mem = case hipe_x86:is_temp(Src1) of +	      true  -> hipe_x86:mk_mem(Src1, Src2, Type); +	      false -> hipe_x86:mk_mem(Src2, Src1, Type) +	    end, +      [hipe_x86:mk_lea(Mem, Dst) | Tail] +  end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> +  conv_alu(Dst, Src1, BinOp, Src2, Tail). +  conv_alu(Dst, Src1, 'imul', Src2, Tail) ->    mk_imul(Src1, Src2, Dst, Tail);  conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -572,6 +589,16 @@ conv_fun(Fun, Map) ->        end    end. +conv_src_noimm(Opnd, Map) -> +  R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), +  case hipe_x86:is_imm(Src) of +    false -> R; +    true -> +      Tmp = new_untagged_temp(), +      {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], +       Tmp, NewMap} +  end. +  %%% Convert an RTL source operand (imm/var/reg).  conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..b9f9c711f3 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -24,7 +24,7 @@  -export([init/1,           labels/1, start_label/1,           succ/2, pred/2, -         bb/2, bb_add/3]). +         bb/2, bb_add/3, map_bbs/2, fold_bbs/3]).  -export([postorder/1, reverse_postorder/1]).  -export([linearise/1, params/1, arity/1, redirect_jmp/3]). @@ -33,6 +33,7 @@  -define(BREADTH_ORDER,true).  -define(PARAMS_NEEDED,true).  -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true).  -include("hipe_x86.hrl").  -include("../flow/cfg.hrl"). @@ -107,7 +108,7 @@ mk_goto(Label) ->    hipe_x86:mk_jmp_label(Label).  is_label(I) -> -  hipe_x86:is_label(I). +  case I of #label{} -> true; _ -> false end.  label_name(Label) ->    hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 9cba6cbe4b..4455def74e 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -35,7 +35,7 @@  -endif.  -module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_use/1]). %% src_use/1]). +-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]).  -include("../x86/hipe_x86.hrl").  %%% @@ -64,6 +64,16 @@ insn_def(I) ->      _ -> []    end. + +%% @doc Answers whether instruction I defines all allocatable registers. Used by +%% hipe_regalloc_prepass. +-spec insn_defs_all(_) -> boolean(). +insn_defs_all(I) -> +  case I of +    #pseudo_call{} -> true; +    _ -> false +  end. +  dst_def(Dst) ->    case Dst of      #x86_temp{} -> [Dst]; diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 8851ead250..fc782571bf 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -46,15 +46,13 @@  -include("../x86/hipe_x86.hrl").  -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> -  Formals = fix_formals(hipe_x86:defun_formals(Defun)), -  Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), -  MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> +  Formals = fix_formals(hipe_x86_cfg:params(CFG0)), +  Temps0 = all_temps(CFG0, Formals), +  MinFrame = defun_minframe(CFG0),    Temps = ensure_minframe(MinFrame, Temps0), -  CFG0 = hipe_x86_cfg:init(Defun),    Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), -  CFG1 = do_body(CFG0, Liveness, Formals, Temps), -  hipe_x86_cfg:linearise(CFG1). +  do_body(CFG0, Liveness, Formals, Temps).  fix_formals(Formals) ->    fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +67,14 @@ do_body(CFG0, Liveness, Formals, Temps) ->    do_prologue(CFG1, Context).  do_blocks(CFG, Context) -> -  Labels = hipe_x86_cfg:labels(CFG), -  do_blocks(Labels, CFG, Context). +  hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) ->    Liveness = context_liveness(Context),    LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), -  Block = hipe_x86_cfg:bb(CFG, Label),    Code = hipe_bb:code(Block), -  NewCode = do_block(Code, LiveOut, Context), -  NewBlock = hipe_bb:code_update(Block, NewCode), -  NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), -  do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> -  CFG. - -do_block(Insns, LiveOut, Context) -> -  do_block(Insns, LiveOut, Context, context_framesize(Context), []). +  NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), +  hipe_bb:code_update(Block, NewCode).  do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->    {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -609,39 +598,46 @@ temp_is_pseudo(Temp) ->  %%% Build the set of all temps used in a Defun's body.  %%% -all_temps(Code, Formals) -> -  S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> +  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),    S1 = tset_del_list(S0, Formals),    S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end),    S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) ->    S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), -  S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), -  find_temps(Insns, S2); -find_temps([], S) -> -  S. +  tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> +  hipe_x86_cfg:fold_bbs( +    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, +    InitAcc, CFG). + +-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, +		   tset_filter/2, tset_to_list/1]}).  tset_empty() -> -  gb_sets:new(). +  #{}.  tset_size(S) -> -  gb_sets:size(S). +  map_size(S).  tset_insert(S, T) -> -  gb_sets:add_element(T, S). +  S#{T => []}. -tset_add_list(S, Ts) -> -  gb_sets:union(S, gb_sets:from_list(Ts)). +tset_add_list(S, []) -> S; +tset_add_list(S, [T|Ts]) -> +  tset_add_list(S#{T => []}, Ts). -tset_del_list(S, Ts) -> -  gb_sets:subtract(S, gb_sets:from_list(Ts)). +tset_del_list(S, []) -> S; +tset_del_list(S, [T|Ts]) -> +  tset_del_list(maps:remove(T,S), Ts).  tset_filter(S, F) -> -  gb_sets:filter(F, S). +  maps:filter(fun(K, _V) -> F(K) end, S).  tset_to_list(S) -> -  gb_sets:to_list(S). +  maps:keys(S).  %%%  %%% Compute minimum permissible frame size, ignoring spilled temps. @@ -649,16 +645,11 @@ tset_to_list(S) ->  %%% in the middle of a tailcall.  %%% -defun_minframe(Defun) -> -  MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), -  MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> +  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), +  MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))),    erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> -  body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> -  MTA. -  insn_mta(I, MTA) ->    case I of      #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..341269b698 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -53,19 +53,23 @@  ?RTL_TO_X86(MFA, RTL, Options) ->    Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL),  			    "RTL-to-"?X86STR, Options), -  SpillRest =  +  TransCFG = ?option_time(hipe_x86_cfg:init(Translated), +			  ?X86STR" to cfg", Options), +  SpillRestCFG =      case proplists:get_bool(caller_save_spill_restore, Options) of        true -> -	?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), +	?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options),  		     ?X86STR" spill restore", Options);        false -> -	Translated +	TransCFG      end, -  Allocated  = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), -			    ?X86STR" register allocation", Options), -  Framed     = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options),  -			    ?X86STR" frame", Options), -  Finalised  = ?option_time(hipe_x86_postpass:postpass(Framed, Options), -			    ?X86STR" finalise", Options), +  AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), +			       ?X86STR" register allocation", Options), +  FramedCFG    = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), +			       ?X86STR" frame", Options), +  Framed       = ?option_time(hipe_x86_cfg:linearise(FramedCFG), +			      ?X86STR" linearise", Options), +  Finalised    = ?option_time(hipe_x86_postpass:postpass(Framed, Options), +			      ?X86STR" finalise", Options),    ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options),    {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl index 9352cf5dbf..ff26a31877 100644 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -171,7 +171,7 @@ pp_insn(Dev, I, Pre) ->      #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} ->        io:format(Dev, "\tpseudo_tailcall ", []),        pp_fun(Dev, Fun), -      io:format(Dev, "~w (", [Arity]), +      io:format(Dev, " ~w (", [Arity]),        pp_args(Dev, StkArgs),        io:format(Dev, ") ~w\n", [Linkage]);      #pseudo_tailcall_prepare{} -> diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..b64c22a76c 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -41,60 +41,83 @@  %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.  -include("../main/hipe.hrl"). -ra(Defun0, Options) -> -  %% ?HIPE_X86_PP:pp(Defun0), -  {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), -  %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> +  hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, +			0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> +  %% hipe_x86_cfg:pp(CFG0), +  Liveness0 = ?HIPE_X86_SPECIFIC:analyze(CFG0, no_context), +  {CFG1, Liveness, Coloring_fp, SpillIndex} = ra_fp(CFG0, Liveness0, Options), +  %% hipe_x86_cfg:pp(CFG1),    ?start_ra_instrumentation(Options, -			    length(hipe_x86:defun_code(Defun1)), -			    element(2,hipe_x86:defun_var_range(Defun1))), -  {Defun2, Coloring} +			    code_size(CFG1), +			    element(2,hipe_gensym:var_range(x86))), +  {CFG2, _, Coloring}      = case proplists:get_value(regalloc, Options, coalescing) of  	coalescing -> -	  ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); +	  ra(CFG1, Liveness, SpillIndex, Options, hipe_coalescing_regalloc);  	optimistic -> -	  ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); +	  ra(CFG1, Liveness, SpillIndex, Options, hipe_optimistic_regalloc);  	graph_color -> -	  ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); +	  ra(CFG1, Liveness, SpillIndex, Options, hipe_graph_coloring_regalloc);  	linear_scan -> -	  ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); +	  ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options);  	naive -> -	  ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); +	  ?HIPE_X86_RA_NAIVE:ra(CFG1, Liveness, Coloring_fp, Options);          _ ->  	  exit({unknown_regalloc_compiler_option,  		proplists:get_value(regalloc,Options)})        end,    ?stop_ra_instrumentation(Options, -			   length(hipe_x86:defun_code(Defun2)), -			   element(2,hipe_x86:defun_var_range(Defun2))), -  %% ?HIPE_X86_PP:pp(Defun2), -  ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). +			   code_size(CFG2), +  			   element(2,hipe_gensym:var_range(x86))), +  %% hipe_x86_cfg:pp(CFG2), +  ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> -  hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> +  hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, +			?HIPE_X86_SPECIFIC, no_context).  -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> -  case proplists:get_bool(inline_fp, Options) and -       (proplists:get_value(regalloc, Options) =/= naive) of -    true -> -      case proplists:get_bool(x87, Options) of -	true -> -	  hipe_amd64_ra_x87_ls:ra(Defun, Options); -	false -> -	  hipe_regalloc_loop:ra_fp(Defun, Options, -				   hipe_coalescing_regalloc, -				   hipe_amd64_specific_sse2) -      end; -    false -> -      {Defun,[],0} +ra_fp(CFG, Liveness, Options) -> +  Regalloc0 = proplists:get_value(regalloc, Options), +  {Regalloc, TargetMod} = +    case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of +      false -> {naive, undefined}; +      true -> +	case proplists:get_bool(x87, Options) of +	  true ->  {linear_scan, hipe_amd64_specific_x87}; +	  false -> {Regalloc0,   hipe_amd64_specific_sse2} +	end +    end, +  case Regalloc of +    coalescing -> +      ra_fp(CFG, Liveness, Options, hipe_coalescing_regalloc, TargetMod); +    optimistic -> +      ra_fp(CFG, Liveness, Options, hipe_optimistic_regalloc, TargetMod); +    graph_color -> +      ra_fp(CFG, Liveness, Options, hipe_graph_coloring_regalloc, TargetMod); +    linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Liveness, Options, TargetMod, +					  no_context); +    naive -> {CFG,Liveness,[],0}; +    _ -> +      exit({unknown_regalloc_compiler_option, +	    proplists:get_value(regalloc,Options)})    end. + +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod) -> +  hipe_regalloc_loop:ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, +			   no_context).  -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Liveness, Options) ->    case proplists:get_bool(inline_fp, Options) of      true -> -      hipe_x86_ra_x87_ls:ra(Defun, Options); +      hipe_x86_ra_ls:ra_fp(CFG, Liveness, Options, hipe_x86_specific_x87, +			   no_context);      false -> -      {Defun,[],0} +      {CFG,Liveness,[],0}    end.  -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..edfd7b332c 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -25,23 +25,36 @@  -define(HIPE_X86_RA_FINALISE,	hipe_amd64_ra_finalise).  -define(HIPE_X86_REGISTERS,	hipe_amd64_registers).  -define(HIPE_X86_X87,		hipe_amd64_x87). +-define(HIPE_X86_SSE2,		hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr),	Expr).  -else.  -define(HIPE_X86_RA_FINALISE,	hipe_x86_ra_finalise).  -define(HIPE_X86_REGISTERS,	hipe_x86_registers).  -define(HIPE_X86_X87,		hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),).  -endif.  -module(?HIPE_X86_RA_FINALISE).  -export([finalise/4]).  -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> -  Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> +  CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options),    case proplists:get_bool(x87, Options) of      true -> -      ?HIPE_X86_X87:map(Defun1); +      ?HIPE_X86_X87:map(CFG1);      _ -> -      Defun1 +      case +	proplists:get_bool(inline_fp, Options) +	and (proplists:get_value(regalloc, Options) =:= linear_scan) +      of +	%% Ugly, but required to avoid Dialyzer complaints about "Unknown +	%% function" hipe_x86_sse2:map/1 +	?IF_HAS_SSE2(true -> +			?HIPE_X86_SSE2:map(CFG1);) +	false -> +	  CFG1 +      end    end.  %%% @@ -50,15 +63,16 @@ finalise(Defun, TempMap, FpMap, Options) ->  %%% but I just want this to work now)  %%% -finalise_ra(Defun, [], [], _Options) -> -  Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> -  Code = hipe_x86:defun_code(Defun), -  {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> +  CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> +  {_, SpillLimit} = hipe_gensym:var_range(x86),    Map = mk_ra_map(TempMap, SpillLimit),    FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), -  NewCode = ra_code(Code, Map, FpMap0), -  Defun#defun{code=NewCode}. +  hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> +  hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)).  ra_code(Code, Map, FpMap) ->    [ra_insn(I, Map, FpMap) || I <- Code]. @@ -230,49 +244,27 @@ mk_ra_map(TempMap, SpillLimit) ->  	      gb_trees:empty(),  	      TempMap). -conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> +conv_ra_maplet({From,To}, SpillLimit, IsPrecoloured) +  when is_integer(From), From =< SpillLimit ->    %% From should be a pseudo, or a hard reg mapped to itself. -  if is_integer(From), From =< SpillLimit -> -      case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of -	false -> []; -	_ -> -	  case To of -	    {reg, From} -> []; -	    _ -> exit({?MODULE,conv_ra_maplet,MapLet}) -	  end -      end; -     true -> exit({?MODULE,conv_ra_maplet,MapLet}) +  case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of +    false -> ok; +    _ -> To = {reg, From}, ok    end,    %% end of From check    case To of -    {reg, NewReg} -> +    {reg, NewReg} when is_integer(NewReg) ->        %% NewReg should be a hard reg, or a pseudo mapped        %% to itself (formals are handled this way). -      if is_integer(NewReg) -> -	  case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of -	    true -> []; -	    _ -> if From =:= NewReg -> []; -		    true -> -		     exit({?MODULE,conv_ra_maplet,MapLet}) -		 end -	  end; -	 true -> exit({?MODULE,conv_ra_maplet,MapLet}) -      end, -      %% end of NewReg check +      true = (?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) orelse From =:= NewReg),        {From, NewReg}; -    {spill, SpillIndex} -> -      %% SpillIndex should be >= 0. -      if is_integer(SpillIndex), SpillIndex >= 0 -> []; -	 true -> exit({?MODULE,conv_ra_maplet,MapLet}) -      end, -      %% end of SpillIndex check +    {spill, SpillIndex} when is_integer(SpillIndex), SpillIndex >= 0 ->        ToTempNum = SpillLimit+SpillIndex+1,        MaxTempNum = hipe_gensym:get_var(x86),        if MaxTempNum >= ToTempNum -> ok;  	 true -> hipe_gensym:set_var(x86, ToTempNum)        end, -      {From, ToTempNum}; -    _ -> exit({?MODULE,conv_ra_maplet,MapLet}) +      {From, ToTempNum}    end.  mk_ra_map_x87(FpMap, SpillLimit) -> diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..34ce50d494 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -35,41 +35,64 @@  -endif.  -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/5]).  -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.  -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> -  NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), -  CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) ->    SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( -		 CFG), +		 CFG, no_context), +  ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), +  alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Liveness, Options, TargetMod, TargetCtx) -> +  ?inc_counter(ra_calls_counter,1), +  %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), +  SpillIndex = 0, +  SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx),    ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), -  alloc(NewDefun, SpillIndex, SpillLimit, Options). +  ?inc_counter(ra_iteration_counter,1), +  %% ?HIPE_X86_PP:pp(Defun), + +  {Coloring,NewSpillIndex} = +    regalloc(CFG, Liveness, +	     TargetMod:allocatable('linearscan', TargetCtx), +	     [hipe_x86_cfg:start_label(CFG)], +	     SpillIndex, SpillLimit, Options, +	     TargetMod, TargetCtx), + +  {NewCFG, _DidSpill} = +    TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan', TargetCtx), +  TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), +  {TempMap2, NewSpillIndex2} = +    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, +			     TargetMod, TargetCtx, TempMap), +  Coloring2 = +    hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), +  ?add_spills(Options, NewSpillIndex), +  {NewCFG, Liveness, Coloring2, NewSpillIndex2}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) ->    ?inc_counter(ra_iteration_counter,1),     %% ?HIPE_X86_PP:pp(Defun),	 -  CFG = hipe_x86_cfg:init(Defun), -  {Coloring, NewSpillIndex} =  +  {Coloring, NewSpillIndex} =      regalloc( -      CFG,  +      CFG, Liveness,        ?HIPE_X86_REGISTERS:allocatable()--        [?HIPE_X86_REGISTERS:temp1(),         ?HIPE_X86_REGISTERS:temp0()],        [hipe_x86_cfg:start_label(CFG)],        SpillIndex, SpillLimit, Options, -      ?HIPE_X86_SPECIFIC), -  {NewDefun, _DidSpill} = +      ?HIPE_X86_SPECIFIC, no_context), +  {NewCFG, _DidSpill} =      ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( -      Defun, Coloring, 'linearscan'), +      CFG, Coloring, 'linearscan'),    %% ?HIPE_X86_PP:pp(NewDefun), -  TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), -  {TempMap2,NewSpillIndex2} =  -    hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, -			     ?HIPE_X86_SPECIFIC, TempMap), +  TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), +  {TempMap2,NewSpillIndex2} = +    hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, +			     ?HIPE_X86_SPECIFIC, no_context, TempMap),    Coloring2 =       hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2),    case proplists:get_bool(verbose_spills, Options) of @@ -79,8 +102,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) ->        ok    end,    ?add_spills(Options, NewSpillIndex), -  {NewDefun, Coloring2}. +  {NewCFG, Liveness, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> -  hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex,  -			    DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, +	 TgtMod, TgtCtx) -> +  hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, +			    DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..35de692e07 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -33,15 +33,14 @@  -endif.  -module(?HIPE_X86_RA_NAIVE). --export([ra/3]). +-export([ra/4]).  -include("../x86/hipe_x86.hrl").  -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation  -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> -  #defun{code=Code0} = X86Defun, -  Code1 = do_insns(Code0), +ra(CFG0, Liveness, Coloring_fp, Options) -> +  CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0),    NofSpilledFloats = count_non_float_spills(Coloring_fp),    NofFloats = length(Coloring_fp),    ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,15 +48,17 @@ ra(X86Defun, Coloring_fp, Options) ->  	      NofSpilledFloats -  	      NofFloats),    TempMap = [], -  {X86Defun#defun{code=Code1, -		  var_range={0, hipe_gensym:get_var(x86)}}, +  {CFG, Liveness,     TempMap}. +do_bb(_Lbl, BB) -> +  hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). +  count_non_float_spills(Coloring_fp) ->    count_non_float_spills(Coloring_fp, 0).  count_non_float_spills([{_,To}|Tail], Num) -> -  case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of +  case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To, no_context) of      true ->        count_non_float_spills(Tail, Num);      false -> diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..f496b71828 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -40,14 +40,18 @@  -include("../main/hipe.hrl").  -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) ->    %% io:format("Converting\n"), -  TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), +  TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context),    %% io:format("Rewriting\n"), -  #defun{code=Code0} = Defun, -  {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), -  {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, -   DidSpill}. +  do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> +  Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), +  {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), +  CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), +  do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill).  do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->    {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -169,14 +173,22 @@ do_jmp_switch(I, TempMap, Strategy) ->  %%% Fix a lea op.  do_lea(I, TempMap, Strategy) -> -  #lea{temp=Temp} = I, -  case is_spilled(Temp, TempMap) of -    false -> -      {[I], false}; -    true -> -      NewTmp = spill_temp('untagged', Strategy), -      {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], -       true} +  #lea{mem=Mem0,temp=Temp0} = I, +  {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), +  case Mem of +    #x86_mem{base=Base, off=#x86_imm{value=0}} -> +      %% We've decayed into a move due to both operands being memory (there's an +      %% 'add' in FixMem). +      {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; +    #x86_mem{} -> +      {StoreTemp, Temp, DidSpill2} = +	case is_mem_opnd(Temp0, TempMap) of +	  false -> {[], Temp0, false}; +	  true -> +	    Temp1 = clone2(Temp0, temp0(Strategy)), +	    {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} +	end, +      {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2}    end.  %%% Fix a move op. @@ -377,19 +389,12 @@ is_mem_opnd(Opnd, TempMap) ->  	Reg = hipe_x86:temp_reg(Opnd),  	case hipe_x86:temp_is_allocatable(Opnd) of  	  true -> -	    case tuple_size(TempMap) > Reg of +	    case +	      hipe_temp_map:is_spilled(Reg, TempMap) of  	      true -> -		case -		  hipe_temp_map:is_spilled(Reg, TempMap) of -		  true -> -		    ?count_temp(Reg), -		    true; -		  false -> false -		end; -	      _ -> -		%% impossible, but was true in ls post and false in normal post -		exit({?MODULE,is_mem_opnd,Reg}), -		false +		?count_temp(Reg), +		true; +	      false -> false  	    end;  	  false -> true  	end; @@ -404,15 +409,10 @@ is_spilled(Temp, TempMap) ->    case hipe_x86:temp_is_allocatable(Temp) of      true ->        Reg = hipe_x86:temp_reg(Temp), -      case tuple_size(TempMap) > Reg of +      case hipe_temp_map:is_spilled(Reg, TempMap) of  	true -> -	  case hipe_temp_map:is_spilled(Reg, TempMap) of -	    true -> -	      ?count_temp(Reg), -	      true; -	    false -> -	      false -	  end; +	  ?count_temp(Reg), +	  true;  	false ->  	  false        end; @@ -429,14 +429,14 @@ clone(Dst, Strategy) ->      end,    spill_temp(Type, Strategy). -spill_temp0(Type, 'normal') -> +spill_temp0(Type, 'normal') when Type =/= double ->    hipe_x86:mk_new_temp(Type); -spill_temp0(Type, 'linearscan') -> +spill_temp0(Type, 'linearscan') when Type =/= double ->    hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). -spill_temp(Type, 'normal') -> +spill_temp(Type, 'normal') when Type =/= double ->    hipe_x86:mk_new_temp(Type); -spill_temp(Type, 'linearscan') -> +spill_temp(Type, 'linearscan') when Type =/= double ->    hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type).  %%% Make a certain reg into a clone of Dst @@ -448,6 +448,6 @@ clone2(Dst, RegOpt) ->        #x86_temp{} -> hipe_x86:temp_type(Dst)      end,    case RegOpt of -    [] -> hipe_x86:mk_new_temp(Type); +    [] when Type =/= double -> hipe_x86:mk_new_temp(Type);      Reg -> hipe_x86:mk_temp(Reg, Type)    end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%%  -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%%  -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%%     http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%  -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> -    ?inc_counter(ra_calls_counter,1), -    CFG = hipe_x86_cfg:init(Defun), -    %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), -    SpillIndex = 0, -    SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), -    ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - -    ?inc_counter(ra_iteration_counter,1), -    %% ?HIPE_X86_PP:pp(Defun), -    Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - -    {Coloring,NewSpillIndex} = -	?HIPE_X86_RA_LS:regalloc(Cfg, -				 ?HIPE_X86_SPECIFIC_X87:allocatable(), -				 [hipe_x86_cfg:start_label(Cfg)], -				 SpillIndex, SpillLimit, Options, -				 ?HIPE_X86_SPECIFIC_X87), - -    ?add_spills(Options, NewSpillIndex), -    {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl index 179d734501..f00bbfb280 100644 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -224,6 +224,8 @@ ret(N) ->  	    exit({?MODULE, ret, N})      end. +%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of +%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1  call_clobbered() ->      [{?EAX,tagged},{?EAX,untagged},	% does the RA strip the type or not?       {?EDX,tagged},{?EDX,untagged}, diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..32b1eb7b40 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -25,13 +25,11 @@  -ifdef(HIPE_AMD64).  -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore).  -define(HIPE_X86_LIVENESS,      hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC,      hipe_amd64_specific).  -define(HIPE_X86_REGISTERS,	hipe_amd64_registers).  -define(X86STR, "amd64").  -else.  -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore).  -define(HIPE_X86_LIVENESS,      hipe_x86_liveness). --define(HIPE_X86_SPECIFIC,      hipe_x86_specific).  -define(HIPE_X86_REGISTERS,     hipe_x86_registers).  -define(X86STR, "x86").  -endif. @@ -51,15 +49,13 @@  -include("../flow/cfg.hrl").     % Added for the definition of #cfg{}  %% Main function -spill_restore(Defun, Options) -> -  CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), -  CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), -  hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> +  CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), +  ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options).  %% Performs the first pass of the algorithm.  %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> -  CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) ->    %% get the labels bottom up    Labels = hipe_x86_cfg:postorder(CFG0),    Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl new file mode 100644 index 0000000000..5e642d1d06 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_subst.erl @@ -0,0 +1,94 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%%  +%% Copyright Ericsson AB 2016. All Rights Reserved. +%%  +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%%     http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%  +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SUBST, hipe_amd64_subst). +-else. +-define(HIPE_X86_SUBST, hipe_x86_subst). +-endif. + +-module(?HIPE_X86_SUBST). +-export([insn_temps/2]). +-include("../x86/hipe_x86.hrl"). + +%% These should be moved to hipe_x86 and exported +-type temp()   :: #x86_temp{}. +-type oper()   :: temp() | #x86_imm{} | #x86_mem{}. +-type mfarec() :: #x86_mfa{}. +-type prim()   :: #x86_prim{}. +-type funv()   :: mfarec() | prim() | temp(). +-type insn()   :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(SubstTemp, I) -> +  O = fun(O) -> oper_temps(SubstTemp, O) end, +  case I of +    #alu     {src=S, dst=D}  -> I#alu     {src=O(S), dst=O(D)}; +    #cmovcc  {src=S, dst=D}  -> I#cmovcc  {src=O(S), dst=O(D)}; +    #cmp     {src=S, dst=D}  -> I#cmp     {src=O(S), dst=O(D)}; +    #fmove   {src=S, dst=D}  -> I#fmove   {src=O(S), dst=O(D)}; +    #fp_binop{src=S, dst=D}  -> I#fp_binop{src=O(S), dst=O(D)}; +    #imul    {src=S, temp=T} -> I#imul    {src=O(S), temp=O(T)}; +    #lea     {mem=M, temp=T} -> I#lea     {mem=O(M), temp=O(T)}; +    #move    {src=S, dst=D}  -> I#move    {src=O(S), dst=O(D)}; +    #movsx   {src=S, dst=D}  -> I#movsx   {src=O(S), dst=O(D)}; +    #movzx   {src=S, dst=D}  -> I#movzx   {src=O(S), dst=O(D)}; +    #shift   {src=S, dst=D}  -> I#shift   {src=O(S), dst=O(D)}; +    #test    {src=S, dst=D}  -> I#test    {src=O(S), dst=O(D)}; +    #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; +    #move64 {dst=D} -> I#move64 {dst=O(D)}; +    #push   {src=S} -> I#push   {src=O(S)}; +    #pop    {dst=D} -> I#pop    {dst=O(D)}; +    #jmp_switch{temp=T, jtab=J} -> +      I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; +    #pseudo_call{'fun'=F} -> +      I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; +    #pseudo_tailcall{'fun'=F, stkargs=Stk} -> +      I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), +			stkargs=lists:map(O, Stk)}; +    #comment{} -> I; +    #jmp_label{} -> I; +    #pseudo_tailcall_prepare{} -> I; +    #pseudo_jcc{} -> I; +    #ret{} -> I +  end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(_SubstTemp, I=#x86_imm{}) -> I; +oper_temps(SubstTemp,  T=#x86_temp{}) -> SubstTemp(T); +oper_temps(SubstTemp,  M=#x86_mem{base=Base,off=Off}) -> +  M#x86_mem{base=oper_temps(SubstTemp, Base), +	    off =oper_temps(SubstTemp, Off)}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, MFA=#x86_mfa{}) -> MFA; +funv_temps(_SubstTemp, P=#x86_prim{}) -> P; +funv_temps(SubstTemp,  T=#x86_temp{}) -> SubstTemp(T). + +%% TODO: Undo this ifdeffery at the source (make jtab an #x86_imm{} on x86) +-ifdef(HIPE_AMD64). +jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). +-else. +jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. +-endif. diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..10bb6aa75c 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -41,13 +41,12 @@  %%---------------------------------------------------------------------- -map(Defun) -> -  CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) ->    %% hipe_x86_cfg:pp(CFG0),    Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),    StartLabel = hipe_x86_cfg:start_label(CFG0),    {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), -  hipe_x86_cfg:linearise(CFG1). +  CFG1.  do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) ->    case gb_trees:lookup(Lbl, BlockMap) of  | 
