diff options
Diffstat (limited to 'lib/hipe')
261 files changed, 8435 insertions, 4439 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile index 8dc2af2679..1b6e4ea947 100644 --- a/lib/hipe/amd64/Makefile +++ b/lib/hipe/amd64/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2004-2016. All Rights Reserved. +# Copyright Ericsson AB 2004-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -57,10 +57,11 @@ MODULES=hipe_amd64_assemble \ hipe_amd64_ra_naive \ hipe_amd64_ra_postconditions \ hipe_amd64_ra_sse2_postconditions \ - hipe_amd64_ra_x87_ls \ hipe_amd64_registers \ hipe_amd64_spill_restore \ + hipe_amd64_subst \ hipe_amd64_x87 \ + hipe_amd64_sse2 \ hipe_rtl_to_amd64 ERL_FILES=$(MODULES:%=%.erl) @@ -125,10 +126,11 @@ $(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl $(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl $(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl $(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl $(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl +$(EBIN)/hipe_amd64_subst.beam: ../x86/hipe_x86_subst.erl $(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl +$(EBIN)/hipe_amd64_sse2.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl $(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl $(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/amd64/hipe_amd64_assemble.erl b/lib/hipe/amd64/hipe_amd64_assemble.erl index a7b11f7c72..1379850515 100644 --- a/lib/hipe/amd64/hipe_amd64_assemble.erl +++ b/lib/hipe/amd64/hipe_amd64_assemble.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,7 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% + -include("../x86/hipe_x86_assemble.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_defuse.erl b/lib/hipe/amd64/hipe_amd64_defuse.erl index 907f078f3f..9074c3e05e 100644 --- a/lib/hipe/amd64/hipe_amd64_defuse.erl +++ b/lib/hipe/amd64/hipe_amd64_defuse.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_defuse.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl index df15732cea..bda2824ffc 100644 --- a/lib/hipe/amd64/hipe_amd64_encode.erl +++ b/lib/hipe/amd64/hipe_amd64_encode.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Copyright (C) 2000-2004 Mikael Pettersson %%% Copyright (C) 2004 Daniel Luna @@ -63,7 +56,7 @@ -export([% condition codes cc/1, % 8-bit registers - %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + %% al/0, cl/0, dl/0, bl/0, % 32-bit registers %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, % operands @@ -127,19 +120,15 @@ cc(g) -> ?CC_G. -define(CL, 2#001). -define(DL, 2#010). -define(BL, 2#011). --define(AH, 2#100). --define(CH, 2#101). --define(DH, 2#110). --define(BH, 2#111). +-define(SPL, 2#100). +-define(BPL, 2#101). +-define(SIL, 2#110). +-define(DIL, 2#111). %% al() -> ?AL. %% cl() -> ?CL. %% dl() -> ?DL. %% bl() -> ?BL. -%% ah() -> ?AH. -%% ch() -> ?CH. -%% dh() -> ?DH. -%% bh() -> ?BH. %%% 32-bit registers @@ -208,6 +197,7 @@ rex_([]) -> 0; rex_([{r8, Reg8}| Rest]) -> % 8 bit registers case Reg8 of {rm_mem, _} -> rex_(Rest); + {rm_reg, R} -> rex_([{r8, R} | Rest]); 4 -> (1 bsl 8) bor rex_(Rest); 5 -> (1 bsl 8) bor rex_(Rest); 6 -> (1 bsl 8) bor rex_(Rest); @@ -825,12 +815,26 @@ shd_op_encode(Opcode, Opnds) -> test_encode(Opnds) -> case Opnds of + {al, {imm8,Imm8}} -> + [16#A8, Imm8]; + {ax, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])]; {eax, {imm32,Imm32}} -> [16#A9 | le32(Imm32, [])]; + {rax, {imm32,Imm32}} -> + [rex([{w,1}]), 16#A9 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; {{rm32,RM32}, {imm32,Imm32}} -> [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm64,RM64}, {imm32,Imm32}} -> + [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#000, le32(Imm32, []))]; {{rm32,RM32}, {reg32,Reg32}} -> - [16#85 | encode_rm(RM32, Reg32, [])] + [16#85 | encode_rm(RM32, Reg32, [])]; + {{rm64,RM64}, {reg64,Reg64}} -> + [rex([{w,1}]), 16#85 | encode_rm(RM64, Reg64, [])] end. %% test_sizeof(Opnds) -> @@ -1309,18 +1313,22 @@ dotest1(OS) -> Imm32 = {imm32,Word32}, Imm16 = {imm16,Word16}, Imm8 = {imm8,Word8}, + RM64 = {rm64,rm_reg(?EDX)}, RM32 = {rm32,rm_reg(?EDX)}, RM16 = {rm16,rm_reg(?EDX)}, + RM16REX = {rm16,rm_reg(?R13)}, RM8 = {rm8,rm_reg(?EDX)}, + RM8REX = {rm8,rm_reg(?SIL)}, Rel32 = {rel32,Word32}, Rel8 = {rel8,Word8}, Moffs32 = {moffs32,Word32}, Moffs16 = {moffs16,Word32}, Moffs8 = {moffs8,Word32}, CC = {cc,?CC_G}, + Reg64 = {reg64,?EAX}, Reg32 = {reg32,?EAX}, Reg16 = {reg16,?EAX}, - Reg8 = {reg8,?AH}, + Reg8 = {reg8,?SPL}, EA = {ea,ea_base(?ECX)}, % exercise each instruction definition t(OS,'adc',{eax,Imm32}), @@ -1465,9 +1473,18 @@ dotest1(OS) -> t(OS,'sub',{RM32,Imm8}), t(OS,'sub',{RM32,Reg32}), t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{al,Imm8}), + t(OS,'test',{ax,Imm16}), t(OS,'test',{eax,Imm32}), + t(OS,'test',{rax,Imm32}), + t(OS,'test',{RM8,Imm8}), + t(OS,'test',{RM8REX,Imm8}), + t(OS,'test',{RM16,Imm16}), + t(OS,'test',{RM16REX,Imm16}), t(OS,'test',{RM32,Imm32}), + t(OS,'test',{RM64,Imm32}), t(OS,'test',{RM32,Reg32}), + t(OS,'test',{RM64,Reg64}), t(OS,'xor',{eax,Imm32}), t(OS,'xor',{RM32,Imm32}), t(OS,'xor',{RM32,Imm8}), diff --git a/lib/hipe/amd64/hipe_amd64_frame.erl b/lib/hipe/amd64/hipe_amd64_frame.erl index f3bcdf302a..800f1c1a08 100644 --- a/lib/hipe/amd64/hipe_amd64_frame.erl +++ b/lib/hipe/amd64/hipe_amd64_frame.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_frame.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_liveness.erl b/lib/hipe/amd64/hipe_amd64_liveness.erl index 5cfdbb0f3e..a1e8403df1 100644 --- a/lib/hipe/amd64/hipe_amd64_liveness.erl +++ b/lib/hipe/amd64/hipe_amd64_liveness.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_liveness.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_main.erl b/lib/hipe/amd64/hipe_amd64_main.erl index c22c6cd73b..75b7746500 100644 --- a/lib/hipe/amd64/hipe_amd64_main.erl +++ b/lib/hipe/amd64/hipe_amd64_main.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_main.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_pp.erl b/lib/hipe/amd64/hipe_amd64_pp.erl index 7c3ee8458a..9dfe571122 100644 --- a/lib/hipe/amd64/hipe_amd64_pp.erl +++ b/lib/hipe/amd64/hipe_amd64_pp.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_pp.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra.erl b/lib/hipe/amd64/hipe_amd64_ra.erl index 1d8453d54d..052e9c1e63 100644 --- a/lib/hipe/amd64/hipe_amd64_ra.erl +++ b/lib/hipe/amd64/hipe_amd64_ra.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_ra.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_finalise.erl b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl index d835c3ec14..82d462fad7 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_finalise.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_ra_finalise.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_ls.erl index 9361b91f04..9fa0edca47 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_ls.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_ls.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_ra_ls.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_naive.erl b/lib/hipe/amd64/hipe_amd64_ra_naive.erl index 38218a65dc..1aa40121c9 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_naive.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_naive.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_ra_naive.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl index 2d03239ea6..9359e0db0a 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_ra_postconditions.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl index b1f7bd7572..891c874a15 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl +++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,13 +11,10 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_amd64_ra_sse2_postconditions). --export([check_and_rewrite/2]). +-export([check_and_rewrite/2, check_and_rewrite/3]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -29,40 +22,50 @@ -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(AMD64Defun, Coloring) -> +check_and_rewrite(AMD64CFG, Coloring) -> + check_and_rewrite(AMD64CFG, Coloring, 'normal'). + +check_and_rewrite(AMD64CFG, Coloring, Strategy) -> %%io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2), + TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2,no_context), %%io:format("Rewriting\n"), - #defun{code=Code0} = AMD64Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, [], false), - {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, - DidSpill}. - -do_insns([I|Insns], TempMap, Accum, DidSpill0) -> - {NewIs, DidSpill1} = do_insn(I, TempMap), - do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); -do_insns([], _TempMap, Accum, DidSpill) -> + do_bbs(hipe_x86_cfg:labels(AMD64CFG), TempMap, Strategy, AMD64CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), + DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> {lists:reverse(Accum), DidSpill}. -do_insn(I, TempMap) -> % Insn -> {Insn list, DidSpill} +do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} case I of #fmove{} -> - do_fmove(I, TempMap); + do_fmove(I, TempMap, Strategy); #fp_unop{} -> - do_fp_unop(I, TempMap); + do_fp_unop(I, TempMap, Strategy); #fp_binop{} -> - do_fp_binop(I, TempMap); + do_fp_binop(I, TempMap, Strategy); + #pseudo_spill_fmove{} -> + do_pseudo_spill_fmove(I, TempMap, Strategy); _ -> %% All non sse2 ops {[I], false} end. %%% Fix an fp_binop. -do_fp_binop(I, TempMap) -> +do_fp_binop(I, TempMap, Strategy) -> #fp_binop{src=Src,dst=Dst} = I, case is_mem_opnd(Dst, TempMap) of true -> - Tmp = clone(Dst), + Tmp = clone(Dst, Strategy), {[#fmove{src=Dst, dst=Tmp}, I#fp_binop{src=Src,dst=Tmp}, #fmove{src=Tmp,dst=Dst}], @@ -71,11 +74,11 @@ do_fp_binop(I, TempMap) -> {[I], false} end. -do_fp_unop(I, TempMap) -> +do_fp_unop(I, TempMap, Strategy) -> #fp_unop{arg=Arg} = I, case is_mem_opnd(Arg, TempMap) of true -> - Tmp = clone(Arg), + Tmp = clone(Arg, Strategy), {[#fmove{src=Arg, dst=Tmp}, I#fp_unop{arg=Tmp}, #fmove{src=Tmp,dst=Arg}], @@ -85,7 +88,7 @@ do_fp_unop(I, TempMap) -> end. %%% Fix an fmove op. -do_fmove(I, TempMap) -> +do_fmove(I, TempMap, Strategy) -> #fmove{src=Src,dst=Dst} = I, case (is_mem_opnd(Src, TempMap) andalso is_mem_opnd(Dst, TempMap)) @@ -93,9 +96,14 @@ do_fmove(I, TempMap) -> orelse ((not is_float_temp(Src)) andalso is_mem_opnd(Dst, TempMap)) of true -> - Tmp = spill_temp(double), - {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}], - true}; + Tmp = spill_temp(double, Strategy), + %% pseudo_spill_fmove allows spill slot move coalescing, but must not + %% contain memory operands (except for spilled temps) + Is = case is_float_temp(Src) andalso is_float_temp(Dst) of + true -> [#pseudo_spill_fmove{src=Src, temp=Tmp, dst=Dst}]; + false -> [#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}] + end, + {Is, true}; false -> {[I], false} end. @@ -103,89 +111,49 @@ do_fmove(I, TempMap) -> is_float_temp(#x86_temp{type=Type}) -> Type =:= double; is_float_temp(#x86_mem{}) -> false. +%%% Fix an pseudo_spill_fmove op. +do_pseudo_spill_fmove(I = #pseudo_spill_fmove{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = is_mem_opnd(Temp, TempMap), + {[I], false}. % nothing to do + %%% Check if an operand denotes a memory cell (mem or pseudo). is_mem_opnd(Opnd, TempMap) -> - R = - case Opnd of - #x86_mem{} -> true; - #x86_temp{type=double} -> - Reg = hipe_x86:temp_reg(Opnd), - case hipe_x86:temp_is_allocatable(Opnd) of - true -> - case tuple_size(TempMap) > Reg of - true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> false - end; - false -> true - end; - _ -> false - end, - %% io:format("Op ~w mem: ~w\n",[Opnd,R]), - R. - -%%% Check if an operand is a spilled Temp. - -%%src_is_spilled(Src, TempMap) -> -%% case hipe_x86:is_temp(Src) of -%% true -> -%% Reg = hipe_x86:temp_reg(Src), -%% case hipe_x86:temp_is_allocatable(Src) of -%% true -> -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end; -%% false -> false -%% end. - -%% is_spilled(Temp, TempMap) -> -%% case hipe_x86:temp_is_allocatable(Temp) of -%% true -> -%% Reg = hipe_x86:temp_reg(Temp), -%% case tuple_size(TempMap) > Reg of -%% true -> -%% case hipe_temp_map:is_spilled(Reg, TempMap) of -%% true -> -%% ?count_temp(Reg), -%% true; -%% false -> -%% false -%% end; -%% false -> -%% false -%% end; -%% false -> true -%% end. + case Opnd of + #x86_mem{} -> true; + #x86_temp{type=double} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + false -> true + end; + _ -> false + end. %%% Make Reg a clone of Dst (attach Dst's type to Reg). -clone(Dst) -> +clone(Dst, Strategy) -> Type = case Dst of #x86_mem{} -> hipe_x86:mem_type(Dst); #x86_temp{} -> hipe_x86:temp_type(Dst) end, - spill_temp(Type). - -spill_temp(Type) -> + spill_temp(Type, Strategy). + +spill_temp(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp(double, 'linearscan') -> + hipe_x86:mk_temp(hipe_amd64_specific_sse2:temp0(no_context), double); +spill_temp(Type, 'linearscan') when Type =:= tagged; Type =/= untagged -> + %% We can make a new temp here since we have yet to allocate registers for + %% these types hipe_x86:mk_new_temp(Type). %%% Make a certain reg into a clone of Dst diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl index 780c2cc547..a5cecef5a1 100644 --- a/lib/hipe/amd64/hipe_amd64_registers.erl +++ b/lib/hipe/amd64/hipe_amd64_registers.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_amd64_registers). @@ -52,6 +45,7 @@ tailcall_clobbered/0, temp0/0, temp1/0, + sse2_temp0/0, %% fixed/0, wordsize/0 ]). @@ -107,6 +101,8 @@ heap_limit_offset() -> ?P_HP_LIMIT. -define(TEMP0, ?R14). -define(TEMP1, ?R13). +-define(SSE2_TEMP0, 00). + -define(PROC_POINTER, ?RBP). reg_name(R) -> @@ -204,24 +200,21 @@ allocatable() -> allocatable_sse2() -> [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15 +sse2_temp0() -> ?SSE2_TEMP0. + allocatable_x87() -> [0,1,2,3,4,5,6]. nr_args() -> ?AMD64_NR_ARG_REGS. -arg(N) -> - if N < ?AMD64_NR_ARG_REGS -> - case N of - 0 -> ?ARG0; - 1 -> ?ARG1; - 2 -> ?ARG2; - 3 -> ?ARG3; - 4 -> ?ARG4; - 5 -> ?ARG5; - _ -> exit({?MODULE, arg, N}) - end; - true -> - exit({?MODULE, arg, N}) +arg(N) when N < ?AMD64_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5 end. is_arg(R) -> @@ -242,12 +235,11 @@ args(Arity) when is_integer(Arity), Arity >= 0 -> args(I, Rest) when I < 0 -> Rest; args(I, Rest) -> args(I-1, [arg(I) | Rest]). -ret(N) -> - case N of - 0 -> ?RAX; - _ -> exit({?MODULE, ret, N}) - end. +ret(0) -> ?RAX. +%% Note: the fact that (allocatable() UNION allocatable_x87() UNION +%% allocatable_sse2()) is a subset of call_clobbered() is hard-coded in +%% hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?RAX,tagged},{?RAX,untagged}, % does the RA strip the type or not? {?RDX,tagged},{?RDX,untagged}, diff --git a/lib/hipe/amd64/hipe_amd64_spill_restore.erl b/lib/hipe/amd64/hipe_amd64_spill_restore.erl index 61e2dfa26d..915ac1adc4 100644 --- a/lib/hipe/amd64/hipe_amd64_spill_restore.erl +++ b/lib/hipe/amd64/hipe_amd64_spill_restore.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_spill_restore.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_sse2.erl b/lib/hipe/amd64/hipe_amd64_sse2.erl new file mode 100644 index 0000000000..1a2d3eac48 --- /dev/null +++ b/lib/hipe/amd64/hipe_amd64_sse2.erl @@ -0,0 +1,76 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% Fix {mem, mem} floating point operations that result from linear scan +%% allocated floats. + +-module(hipe_amd64_sse2). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(CFG) -> + hipe_x86_cfg:map_bbs(fun do_bb/2, CFG). + +do_bb(_Lbl, BB) -> + Code = do_insns(hipe_bb:code(BB), []), + hipe_bb:code_update(BB, Code). + +do_insns([I|Insns], Accum) -> + NewIs = do_insn(I), + do_insns(Insns, lists:reverse(NewIs, Accum)); +do_insns([], Accum) -> + lists:reverse(Accum). + +do_insn(I) -> + case I of + #fp_binop{} -> do_fp_binop(I); + #fmove{} -> do_fmove(I); + _ -> [I] + end. + +do_fp_binop(I = #fp_binop{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fp_binop{src=Src}]. + +do_fmove(I = #fmove{src=Src0,dst=Dst}) -> + {FixSrc, Src} = fix_binary(Src0, Dst), + FixSrc ++ [I#fmove{src=Src}]. + +fix_binary(Src0, Dst) -> + case is_mem_opnd(Src0) of + false -> {[], Src0}; + true -> + case is_mem_opnd(Dst) of + false -> {[], Src0}; + true -> + Src1 = spill_temp(), + {[hipe_x86:mk_fmove(Src0, Src1)], Src1} + end + end. + +is_mem_opnd(#x86_fpreg{reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=double, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured_sse2(Reg); +is_mem_opnd(#x86_temp{type=_, reg=Reg}) -> + not hipe_amd64_registers:is_precoloured(Reg); +is_mem_opnd(#x86_mem{}) -> true. + +spill_temp() -> + hipe_x86:mk_temp(hipe_amd64_registers:sse2_temp0(), double). diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_subst.erl index 6da3f44cd3..b0b9ccbe38 100644 --- a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl +++ b/lib/hipe/amd64/hipe_amd64_subst.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% --include("../x86/hipe_x86_ra_x87_ls.erl"). +-include("../x86/hipe_x86_subst.erl"). diff --git a/lib/hipe/amd64/hipe_amd64_x87.erl b/lib/hipe/amd64/hipe_amd64_x87.erl index 1f42e4749d..afb3aa63e7 100644 --- a/lib/hipe/amd64/hipe_amd64_x87.erl +++ b/lib/hipe/amd64/hipe_amd64_x87.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_x86_x87.erl"). diff --git a/lib/hipe/amd64/hipe_rtl_to_amd64.erl b/lib/hipe/amd64/hipe_rtl_to_amd64.erl index d55b5b2c22..7243e75f84 100644 --- a/lib/hipe/amd64/hipe_rtl_to_amd64.erl +++ b/lib/hipe/amd64/hipe_rtl_to_amd64.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,5 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -include("../x86/hipe_rtl_to_x86.erl"). diff --git a/lib/hipe/arm/Makefile b/lib/hipe/arm/Makefile index 00b6732afa..ed2eccf428 100644 --- a/lib/hipe/arm/Makefile +++ b/lib/hipe/arm/Makefile @@ -61,6 +61,7 @@ MODULES=hipe_arm \ hipe_arm_ra_naive \ hipe_arm_ra_postconditions \ hipe_arm_registers \ + hipe_arm_subst \ hipe_rtl_to_arm HRL_FILES=hipe_arm.hrl diff --git a/lib/hipe/arm/hipe_arm.erl b/lib/hipe/arm/hipe_arm.erl index f34525fa3b..3b090b501a 100644 --- a/lib/hipe/arm/hipe_arm.erl +++ b/lib/hipe/arm/hipe_arm.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm). -export([ @@ -86,6 +79,9 @@ pseudo_move_dst/1, pseudo_move_src/1, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_switch/3, mk_pseudo_tailcall/4, @@ -257,6 +253,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. pseudo_move_src(#pseudo_move{src=Src}) -> Src. +mk_pseudo_spill_move(Dst, Temp, Src) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_switch(JTab, Index, Labels) -> #pseudo_switch{jtab=JTab, index=Index, labels=Labels}. diff --git a/lib/hipe/arm/hipe_arm.hrl b/lib/hipe/arm/hipe_arm.hrl index 558174e3fc..be06b1ebd7 100644 --- a/lib/hipe/arm/hipe_arm.hrl +++ b/lib/hipe/arm/hipe_arm.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% %%%-------------------------------------------------------------------- %%% Basic Values: @@ -108,6 +101,7 @@ -record(pseudo_call_prepare, {nrstkargs}). -record(pseudo_li, {dst, imm, label}). % pre-generated label for use by the assembler -record(pseudo_move, {dst, src}). +-record(pseudo_spill_move, {dst, temp, src}). -record(pseudo_switch, {jtab, index, labels}). -record(pseudo_tailcall, {funv, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). diff --git a/lib/hipe/arm/hipe_arm_assemble.erl b/lib/hipe/arm/hipe_arm_assemble.erl index 4a245cd853..9aa730afa9 100644 --- a/lib/hipe/arm/hipe_arm_assemble.erl +++ b/lib/hipe/arm/hipe_arm_assemble.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_assemble). -export([assemble/4]). @@ -38,7 +31,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, 4), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/arm/hipe_arm_cfg.erl b/lib/hipe/arm/hipe_arm_cfg.erl index f2fa0a5164..0bc3df30b9 100644 --- a/lib/hipe/arm/hipe_arm_cfg.erl +++ b/lib/hipe/arm/hipe_arm_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,26 +11,26 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_cfg). -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1]). -export([params/1, reverse_postorder/1]). -export([arity/1]). % for linear scan %%-export([redirect_jmp/3]). +-export([branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_arm.hrl"). -include("../flow/cfg.hrl"). @@ -80,6 +76,26 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#arm_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + #pseudo_switch{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. diff --git a/lib/hipe/arm/hipe_arm_defuse.erl b/lib/hipe/arm/hipe_arm_defuse.erl index f57b0e601c..652299a514 100644 --- a/lib/hipe/arm/hipe_arm_defuse.erl +++ b/lib/hipe/arm/hipe_arm_defuse.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,13 +11,11 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1]). -include("hipe_arm.hrl"). %%% @@ -46,6 +40,7 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_li{dst=Dst} -> [Dst]; #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_spill_move{dst=Dst, temp=Temp} -> [Dst, Temp]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #smull{dstlo=DstLo,dsthi=DstHi,src1=Src1} -> %% ARM requires DstLo, DstHi, and Src1 to be distinct. @@ -55,6 +50,12 @@ insn_def_gpr(I) -> _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_arm:mk_temp(R, T) || {R,T} <- hipe_arm_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -83,6 +84,7 @@ insn_use_gpr(I) -> #pseudo_call{funv=FunV,sdesc=#arm_sdesc{arity=Arity}} -> funv_use(FunV, arity_use_gpr(Arity)); #pseudo_move{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_switch{jtab=JTabR,index=IndexR} -> addtemp(JTabR, [IndexR]); #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> addargs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); diff --git a/lib/hipe/arm/hipe_arm_encode.erl b/lib/hipe/arm/hipe_arm_encode.erl index 9368cbf628..dedb6547bb 100644 --- a/lib/hipe/arm/hipe_arm_encode.erl +++ b/lib/hipe/arm/hipe_arm_encode.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Encode symbolic ARM instructions to binary form. %%% Copyright (C) 2005 Mikael Pettersson diff --git a/lib/hipe/arm/hipe_arm_finalise.erl b/lib/hipe/arm/hipe_arm_finalise.erl index a4b2f9c73c..3a6fd5a2dd 100644 --- a/lib/hipe/arm/hipe_arm_finalise.erl +++ b/lib/hipe/arm/hipe_arm_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,18 +11,19 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_finalise). --export([finalise/1]). +-export([finalise/2]). -include("hipe_arm.hrl"). -finalise(Defun) -> +finalise(Defun, Options) -> #defun{code=Code0} = Defun, - Code1 = peep(expand(Code0)), - Defun#defun{code=Code1}. + Code1Rev = expand(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peep(Code1Rev); + false -> lists:reverse(Code1Rev) + end, + Defun#defun{code=Code2}. expand(Insns) -> expand_list(Insns, []). @@ -34,7 +31,7 @@ expand(Insns) -> expand_list([I|Insns], Accum) -> expand_list(Insns, expand_insn(I, Accum)); expand_list([], Accum) -> - lists:reverse(Accum). + Accum. expand_insn(I, Accum) -> case I of @@ -63,12 +60,67 @@ expand_insn(I, Accum) -> [I|Accum] end. -peep(Insns) -> - peep_list(Insns, []). +%% We do peephole "bottom-up" (in reverse, but applying rules to the correctly +%% ordered list). This way, we can do replacements that would take multiple +%% passes with an in-order peephole optimiser. +%% +%% N.B., if a rule wants to produce multiple instructions (even if some of them +%% are unchanged, it should push the additional instructions on the More list, +%% so that only the top instruction on Insns is new or changed, i.e. tl(Insns) +%% should have been peepholed previously. +peep(RevInsns) -> + peep_list_skip([], RevInsns). + +peep_list([#b_label{'cond'='al',label=Label} + | (Insns = [#label{label=Label}|_])], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=#arm_temp{reg=Dst} + ,am1=#arm_temp{reg=Dst}}|Insns], More) -> + peep_list_skip(Insns, More); + +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsr,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsl,Imm}} + |Insns], More) when Imm > 0, Imm =< 8 -> + peep_list([#alu{aluop='bic',s=false,dst=Dst,src=Src,am1={(1 bsl Imm)-1,0}} + |Insns], More); +peep_list([#move{movop='mov',s=false,dst=Dst,am1={Src,lsl,Imm}}, + #move{movop='mov',s=false,dst=Dst,am1={Dst,lsr,Imm}} + |Insns], More) when Imm >= 24, Imm < 32 -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={(1 bsl (32-Imm))-1,0}} | Insns], More); + +%% XXX: Load-after-store optimisation should also be applied to RTL, where it +%% can be more general, expose opportunities for constant propagation, etc. +peep_list([#store{stop='strb',src=Src,am2=Mem}=Str, + #load {ldop='ldrb',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={16#ff,0}}|Insns], + [Str|More]); +peep_list([#store{stop='str',src=Src,am2=Mem}=Str, + #load {ldop='ldr',dst=Dst,am2=Mem} | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1=Src}|Insns], [Str|More]); + +peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src,am1={Mask,0}}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={InvMask,0}} + |Insns], More) -> + peep_list([#alu{aluop='and',s=false,dst=Dst,src=Src + ,am1={Mask band (bnot InvMask),0}} | Insns], More); + +%% XXX: The place that generates brain-dead code like the following should be +%% fixed rather than trying to patch it over here. +peep_list([#load{ldop='ldrb',dst=Dst,am2=_Mem}, + #alu{aluop='bic',s=false,dst=Dst,src=Dst,am1={16#ff,0}} + | Insns], More) -> + peep_list([#move{movop='mov',s=false,dst=Dst,am1={0,0}}|Insns], More); + +peep_list(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list(Accum, []) -> + Accum. -peep_list([#b_label{'cond'='al',label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> - peep_list(Insns, Accum); -peep_list([I|Insns], Accum) -> - peep_list(Insns, [I|Accum]); -peep_list([], Accum) -> - lists:reverse(Accum). +%% Used as an optimisation instead of tailcalling peep_list/2 when Insns has +%% already been peeped or is otherwise uninteresting (such as empty). +peep_list_skip(Insns, [I|More]) -> + peep_list([I|Insns], More); +peep_list_skip(Accum, []) -> + Accum. diff --git a/lib/hipe/arm/hipe_arm_frame.erl b/lib/hipe/arm/hipe_arm_frame.erl index e1e441a967..a1004fb609 100644 --- a/lib/hipe/arm/hipe_arm_frame.erl +++ b/lib/hipe/arm/hipe_arm_frame.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_frame). -export([frame/1]). @@ -27,16 +20,14 @@ -define(LIVENESS_ALL, hipe_arm_liveness_gpr). % since we have no FP yet -frame(Defun) -> - Formals = fix_formals(hipe_arm:defun_formals(Defun)), - Temps0 = all_temps(hipe_arm:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_arm_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_arm:defun_code(Defun)), - CFG0 = hipe_arm_cfg:init(Defun), - Liveness = ?LIVENESS_ALL:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_arm_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = ?LIVENESS_ALL:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_arm_registers:nr_args(), Formals). @@ -51,32 +42,21 @@ do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_arm_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_arm_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?LIVENESS_ALL:liveout(Liveness, Label), - Block = hipe_arm_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_arm_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); do_block([], _, Context, FPoff, RevCode) -> FPoff0 = context_framesize(Context), - if FPoff =:= FPoff0 -> []; - true -> exit({?MODULE,do_block,FPoff}) - end, + FPoff0 = FPoff, lists:reverse(RevCode, []). do_insn(I, LiveOut, Context, FPoff) -> @@ -89,6 +69,8 @@ do_insn(I, LiveOut, Context, FPoff) -> do_pseudo_call_prepare(I, FPoff); #pseudo_move{} -> {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; _ -> @@ -120,6 +102,26 @@ pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). %%% +%%% Moves from one spill slot to another +%%% + +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_arm:mk_pseudo_move(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load('ldr', Temp, SrcOffset, mk_sp(), + mk_store('str', Temp, DstOffset, mk_sp(), [])) + end + end. + +%%% %%% Return - deallocate frame and emit 'ret $N' insn. %%% @@ -543,39 +545,46 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr(Insns) -> +clobbers_lr(CFG) -> LRreg = hipe_arm_registers:lr(), LRtagged = hipe_arm:mk_temp(LRreg, 'tagged'), LRuntagged = hipe_arm:mk_temp(LRreg, 'untagged'), - clobbers_lr(Insns, LRtagged, LRuntagged). - -clobbers_lr([I|Insns], LRtagged, LRuntagged) -> - Defs = hipe_arm_defuse:insn_def_gpr(I), - case lists:member(LRtagged, Defs) of - true -> true; - false -> - case lists:member(LRuntagged, Defs) of - true -> true; - false -> clobbers_lr(Insns, LRtagged, LRuntagged) - end - end; -clobbers_lr([], _LRtagged, _LRuntagged) -> false. + any_insn(fun(I) -> + Defs = hipe_arm_defuse:insn_def_gpr(I), + lists:member(LRtagged, Defs) + orelse lists:member(LRuntagged, Defs) + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_arm_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_arm_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_arm_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -604,16 +613,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_arm:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_arm:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_arm_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/arm/hipe_arm_liveness_gpr.erl b/lib/hipe/arm/hipe_arm_liveness_gpr.erl index 82cc5a7a67..ae845e5385 100644 --- a/lib/hipe/arm/hipe_arm_liveness_gpr.erl +++ b/lib/hipe/arm/hipe_arm_liveness_gpr.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_liveness_gpr). -export([analyse/1]). diff --git a/lib/hipe/arm/hipe_arm_main.erl b/lib/hipe/arm/hipe_arm_main.erl index dce1193b24..b87a300a9d 100644 --- a/lib/hipe/arm/hipe_arm_main.erl +++ b/lib/hipe/arm/hipe_arm_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,24 +11,23 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_main). -export([rtl_to_arm/3]). rtl_to_arm(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_arm:translate(RTL), + CFG1 = hipe_arm_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_arm_pp:pp(Defun1), - Defun2 = hipe_arm_ra:ra(Defun1, Options), + CFG2 = hipe_arm_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_arm_pp:pp(Defun2), - Defun3 = hipe_arm_frame:frame(Defun2), + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + CFG3 = hipe_arm_frame:frame(CFG2), + Defun3 = hipe_arm_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_arm_pp:pp(Defun3), - Defun4 = hipe_arm_finalise:finalise(Defun3), + Defun4 = hipe_arm_finalise:finalise(Defun3, Options), %% io:format("~w: after finalise\n", [?MODULE]), pp(Defun4, MFA, Options), {native, arm, {unprofiled, Defun4}}. diff --git a/lib/hipe/arm/hipe_arm_pp.erl b/lib/hipe/arm/hipe_arm_pp.erl index 18aca1fc6b..f49e998d06 100644 --- a/lib/hipe/arm/hipe_arm_pp.erl +++ b/lib/hipe/arm/hipe_arm_pp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_pp). -export([pp/1, pp/2, pp_insn/1]). diff --git a/lib/hipe/arm/hipe_arm_ra.erl b/lib/hipe/arm/hipe_arm_ra.erl index 2f65e864fd..b360fc05c4 100644 --- a/lib/hipe/arm/hipe_arm_ra.erl +++ b/lib/hipe/arm/hipe_arm_ra.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,43 +11,44 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_arm_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of %% true -> -%% hipe_regalloc_loop:ra_fp(Defun0, Options, +%% FPLiveness0 = hipe_arm_specific_fp:analyze(CFG0, no_context), +%% hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, %% hipe_coalescing_regalloc, -%% hipe_arm_specific_fp); +%% hipe_arm_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_arm_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG1)), + GPLiveness1 = hipe_arm_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, + hipe_graph_coloring_regalloc); linear_scan -> - hipe_arm_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_arm_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_arm_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_arm_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_arm_pp:pp(Defun2), - hipe_arm_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_arm_pp:pp(hipe_arm_cfg:linearise(CFG2)), + hipe_arm_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_arm_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_arm_specific, no_context). diff --git a/lib/hipe/arm/hipe_arm_ra_finalise.erl b/lib/hipe/arm/hipe_arm_ra_finalise.erl index 4faeadcd7f..80cd470708 100644 --- a/lib/hipe/arm/hipe_arm_ra_finalise.erl +++ b/lib/hipe/arm/hipe_arm_ra_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,27 +11,31 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_ra_finalise). -export([finalise/3]). -include("hipe_arm.hrl"). -finalise(Defun, TempMap, _FPMap0=[]) -> - Code = hipe_arm:defun_code(Defun), - {_, SpillLimit} = hipe_arm:defun_var_range(Defun), +finalise(CFG, TempMap, _FPMap0=[]) -> + {_, SpillLimit} = hipe_gensym:var_range(arm), Map = mk_ra_map(TempMap, SpillLimit), - NewCode = ra_code(Code, Map, []), - Defun#defun{code=NewCode}. + hipe_arm_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map) end, CFG). + +ra_bb(BB, Map) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, [])). ra_code([I|Insns], Map, Accum) -> - ra_code(Insns, Map, [ra_insn(I, Map) | Accum]); + ra_code(Insns, Map, ra_insn(I, Map, Accum)); ra_code([], _Map, Accum) -> lists:reverse(Accum). -ra_insn(I, Map) -> +ra_insn(I, Map, Accum) -> + case I of + #pseudo_move{} -> ra_pseudo_move(I, Map, Accum); + _ -> [ra_insn_1(I, Map) | Accum] + end. + +ra_insn_1(I, Map) -> case I of #alu{} -> ra_alu(I, Map); #cmp{} -> ra_cmp(I, Map); @@ -44,7 +44,7 @@ ra_insn(I, Map) -> #move{} -> ra_move(I, Map); #pseudo_call{} -> ra_pseudo_call(I, Map); #pseudo_li{} -> ra_pseudo_li(I, Map); - #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_switch{} -> ra_pseudo_switch(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #smull{} -> ra_smull(I, Map); @@ -86,10 +86,19 @@ ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) -> NewDst = ra_temp(Dst, Map), I#pseudo_li{dst=NewDst}. -ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> +ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map, Accum) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + case NewSrc#arm_temp.reg =:= NewDst#arm_temp.reg of + true -> Accum; + false -> [I#pseudo_move{dst=NewDst,src=NewSrc} | Accum] + end. + +ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) -> NewDst = ra_temp(Dst, Map), + NewTemp = ra_temp(Temp, Map), NewSrc = ra_temp(Src, Map), - I#pseudo_move{dst=NewDst,src=NewSrc}. + I#pseudo_spill_move{dst=NewDst, temp=NewTemp, src=NewSrc}. ra_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, Map) -> NewJTab = ra_temp(JTab, Map), diff --git a/lib/hipe/arm/hipe_arm_ra_ls.erl b/lib/hipe/arm/hipe_arm_ra_ls.erl index d9a360d00c..bbb75f9c55 100644 --- a/lib/hipe/arm/hipe_arm_ra_ls.erl +++ b/lib/hipe/arm/hipe_arm_ra_ls.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,43 +11,39 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Linear Scan register allocator for ARM -module(hipe_arm_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_arm_cfg:init(NewDefun), - SpillLimit = hipe_arm_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_arm_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_arm_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_arm_registers:allocatable_gpr()-- [hipe_arm_registers:temp3(), hipe_arm_registers:temp2(), hipe_arm_registers:temp1()], [hipe_arm_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_arm_specific), - {NewDefun, _DidSpill} = + hipe_arm_specific, no_context), + {NewCFG, _DidSpill} = hipe_arm_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context), {SpillMap, _NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_arm_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_arm_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), SpillMap), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/arm/hipe_arm_ra_naive.erl b/lib/hipe/arm/hipe_arm_ra_naive.erl index 6201269f44..e3fe9877ad 100644 --- a/lib/hipe/arm/hipe_arm_ra_naive.erl +++ b/lib/hipe/arm/hipe_arm_ra_naive.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,16 +11,13 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_arm.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_arm_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_arm_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/arm/hipe_arm_ra_postconditions.erl b/lib/hipe/arm/hipe_arm_ra_postconditions.erl index 40978e65f6..23c305511f 100644 --- a/lib/hipe/arm/hipe_arm_ra_postconditions.erl +++ b/lib/hipe/arm/hipe_arm_ra_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_ra_postconditions). @@ -25,17 +18,13 @@ -include("hipe_arm.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_arm_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(arm)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_arm_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +33,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_arm_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_arm_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); @@ -60,6 +56,7 @@ do_insn(I, TempMap, Strategy) -> #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #pseudo_switch{} -> do_pseudo_switch(I, TempMap, Strategy); #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); #smull{} -> do_smull(I, TempMap, Strategy); @@ -112,18 +109,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move and pseudo_tailcall are special cases: in - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move, pseudo_spill_move, and pseudo_tailcall + %% are special cases: in all other instructions, all + %% temps must be non-pseudos after register allocation. + case temp_is_spilled(Dst, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. % nothing to do + do_pseudo_switch(I=#pseudo_switch{jtab=JTab,index=Index}, TempMap, Strategy) -> {FixJTab,NewJTab,DidSpill1} = fix_src1(JTab, TempMap, Strategy), {FixIndex,NewIndex,DidSpill2} = fix_src2(Index, TempMap, Strategy), diff --git a/lib/hipe/arm/hipe_arm_registers.erl b/lib/hipe/arm/hipe_arm_registers.erl index dcf039676b..59545c2e64 100644 --- a/lib/hipe/arm/hipe_arm_registers.erl +++ b/lib/hipe/arm/hipe_arm_registers.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_registers). @@ -180,6 +173,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_arm_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, {?R1,tagged},{?R1,untagged}, diff --git a/lib/hipe/arm/hipe_arm_subst.erl b/lib/hipe/arm/hipe_arm_subst.erl new file mode 100644 index 0000000000..4ff245f414 --- /dev/null +++ b/lib/hipe/arm/hipe_arm_subst.erl @@ -0,0 +1,127 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-module(hipe_arm_subst). +-export([insn_temps/2, insn_lbls/2]). +-include("hipe_arm.hrl"). + +%% These should be moved to hipe_arm and exported +-type temp() :: #arm_temp{}. +-type shiftop() :: lsl | lsr | asr | ror. +-type imm4() :: 0..15. +-type imm5() :: 0..31. +-type imm8() :: 0..255. +-type am1() :: {imm8(),imm4()} + | temp() + | {temp(), rrx} + | {temp(), shiftop(), imm5()} + | {temp(), shiftop(), temp()}. +-type am2() :: #am2{}. +-type am3() :: #am3{}. +-type arg() :: temp() | integer(). +-type funv() :: #arm_mfa{} | #arm_prim{} | temp(). +-type label() :: non_neg_integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + AM1 = fun(O) -> am1_temps(T, O) end, + AM2 = fun(O) -> am2_temps(T, O) end, + AM3 = fun(O) -> am3_temps(T, O) end, + Arg = fun(O) -> arg_temps(T, O) end, + case I of + #alu {dst=D,src=L,am1=R} -> I#alu{dst=T(D),src=T(L),am1=AM1(R)}; + #cmp {src=L,am1=R} -> I#cmp {src=T(L),am1=AM1(R)}; + #load {dst=D,am2=S} -> I#load {dst=T(D),am2=AM2(S)}; + #ldrsb {dst=D,am3=S} -> I#ldrsb {dst=T(D),am3=AM3(S)}; + #move {dst=D,am1=S} -> I#move {dst=T(D),am1=AM1(S)}; + #pseudo_move{dst=D,src=S} -> I#pseudo_move {dst=T(D),src=T(S)}; + #store {src=S,am2=D} -> I#store {src=T(S),am2=AM2(D)}; + #b_label{} -> I; + #comment{} -> I; + #label{} -> I; + #pseudo_bc{} -> I; + #pseudo_blr{} -> I; + #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T, F)}; + #pseudo_call_prepare{} -> I; + #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; + #pseudo_spill_move{dst=D,temp=U,src=S} -> + I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)}; + #pseudo_switch{jtab=J=#arm_temp{},index=Ix=#arm_temp{}} -> + I#pseudo_switch{jtab=T(J),index=T(Ix)}; + #pseudo_tailcall{funv=F,stkargs=Stk} -> + I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #smull{dstlo=DL,dsthi=DH,src1=L,src2=R} -> + I#smull{dstlo=T(DL),dsthi=T(DH),src1=T(L),src2=T(R)} + end. + +-spec am1_temps(subst_fun(), am1()) -> am1(). +am1_temps(_SubstTemp, T={C,R}) when is_integer(C), is_integer(R) -> T; +am1_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T); +am1_temps(SubstTemp, {T=#arm_temp{},rrx}) -> {SubstTemp(T),rrx}; +am1_temps(SubstTemp, {A=#arm_temp{},Op,B=#arm_temp{}}) when is_atom(Op) -> + {SubstTemp(A),Op,SubstTemp(B)}; +am1_temps(SubstTemp, {T=#arm_temp{},Op,I}) when is_atom(Op), is_integer(I) -> + {SubstTemp(T),Op,I}. + +-spec am2_temps(subst_fun(), am2()) -> am2(). +am2_temps(SubstTemp, T=#am2{src=A=#arm_temp{},offset=O0}) -> + O = case O0 of + _ when is_integer(O0) -> O0; + #arm_temp{} -> SubstTemp(O0); + {B=#arm_temp{},rrx} -> {SubstTemp(B),rrx}; + {B=#arm_temp{},Op,I} when is_atom(Op), is_integer(I) -> + {SubstTemp(B),Op,I} + end, + T#am2{src=SubstTemp(A),offset=O}. + +-spec am3_temps(subst_fun(), am3()) -> am3(). +am3_temps(SubstTemp, T=#am3{src=A=#arm_temp{},offset=O0}) -> + O = case O0 of + _ when is_integer(O0) -> O0; + #arm_temp{} -> SubstTemp(O0) + end, + T#am3{src=SubstTemp(A),offset=O}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#arm_mfa{}) -> M; +funv_temps(_SubstTemp, P=#arm_prim{}) -> P; +funv_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#arm_temp{}) -> SubstTemp(T). + +-type lbl_subst_fun() :: fun((label()) -> label()). + +%% @doc Maps over the branch targets in an instruction +-spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). +insn_lbls(SubstLbl, I) -> + case I of + #b_label{label=Label} -> + I#b_label{label=SubstLbl(Label)}; + #pseudo_bc{true_label=T, false_label=F} -> + I#pseudo_bc{true_label=SubstLbl(T), false_label=SubstLbl(F)}; + #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> + I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), + contlab=SubstLbl(Contlab)} + end. + +sdesc_lbls(_SubstLbl, Sdesc=#arm_sdesc{exnlab=[]}) -> Sdesc; +sdesc_lbls(SubstLbl, Sdesc=#arm_sdesc{exnlab=Exnlab}) -> + Sdesc#arm_sdesc{exnlab=SubstLbl(Exnlab)}. diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl index 93342aba33..59e0a79b0d 100644 --- a/lib/hipe/arm/hipe_rtl_to_arm.erl +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_rtl_to_arm). -export([translate/1]). @@ -62,7 +55,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -111,6 +103,17 @@ commute_arithop(ArithOp) -> _ -> ArithOp end. +conv_cmpop('add') -> 'cmn'; +conv_cmpop('sub') -> 'cmp'; +conv_cmpop('and') -> 'tst'; +conv_cmpop('xor') -> 'teq'; +conv_cmpop(_) -> none. + +cmpop_commutes('cmp') -> false; +cmpop_commutes('cmn') -> true; +cmpop_commutes('tst') -> true; +cmpop_commutes('teq') -> true. + mk_alu(S, Dst, Src1, RtlAluOp, Src2) -> case hipe_rtl:is_shift_op(RtlAluOp) of true -> @@ -138,7 +141,6 @@ mk_shift(S, Dst, Src1, ShiftOp, Src2) -> end. mk_shift_ii(S, Dst, Src1, ShiftOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_shift_ri(S, Dst, Tmp, ShiftOp, Src2)). @@ -179,7 +181,6 @@ mk_arith(S, Dst, Src1, ArithOp, Src2) -> end. mk_arith_ii(S, Dst, Src1, ArithOp, Src2) -> - io:format("~w: RTL alu with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Src1, mk_arith_ri(S, Dst, Tmp, ArithOp, Src2)). @@ -225,72 +226,77 @@ fix_aluop_imm(AluOp, Imm) -> % {FixAm1,NewAluOp,Am1} conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), RtlAluOp = hipe_rtl:alub_op(I), - Cond0 = conv_alub_cond(RtlAluOp, hipe_rtl:alub_cond(I)), - Cond = - case {RtlAluOp,Cond0} of - {'mul','vs'} -> 'ne'; % overflow becomes not-equal - {'mul','vc'} -> 'eq'; % no-overflow becomes equal - {'mul',_} -> exit({?MODULE,I}); - {_,_} -> Cond0 - end, - I2 = mk_pseudo_bc( - Cond, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I)), - S = true, - I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), - {I1 ++ I2, Map2, Data}. - -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cond = conv_branch_cond(hipe_rtl:branch_cond(I)), - I2 = mk_branch(Src1, Cond, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {I2, Map1, Data}. + RtlCond = hipe_rtl:alub_cond(I), + HasDst = hipe_rtl:alub_has_dst(I), + CmpOp = conv_cmpop(RtlAluOp), + Cond0 = conv_alub_cond(RtlAluOp, RtlCond), + case (not HasDst) andalso CmpOp =/= none of + true -> + I1 = mk_branch(Src1, CmpOp, Src2, Cond0, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + {I1, Map1, Data}; + false -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + Cond = + case {RtlAluOp,Cond0} of + {'mul','vs'} -> 'ne'; % overflow becomes not-equal + {'mul','vc'} -> 'eq'; % no-overflow becomes equal + {'mul',_} -> exit({?MODULE,I}); + {_,_} -> Cond0 + end, + I2 = mk_pseudo_bc( + Cond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + S = true, + I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), + {I1 ++ I2, Map2, Data} + end. -mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) -> +mk_branch(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) -> case hipe_arm:is_temp(Src1) of true -> case hipe_arm:is_temp(Src2) of true -> - mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred); + mk_branch_rr(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred); _ -> - mk_branch_ri(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + mk_branch_ri(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) end; _ -> case hipe_arm:is_temp(Src2) of true -> - NewCond = commute_cond(Cond), - mk_branch_ri(Src2, NewCond, Src1, TrueLab, FalseLab, Pred); + NewCond = + case cmpop_commutes(CmpOp) of + true -> Cond; + false -> commute_cond(Cond) + end, + mk_branch_ri(Src2, CmpOp, Src1, NewCond, TrueLab, FalseLab, Pred); _ -> - mk_branch_ii(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + mk_branch_ii(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) end end. -mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> - io:format("~w: RTL branch with two immediates\n", [?MODULE]), +mk_branch_ii(Imm1, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred) -> Tmp = new_untagged_temp(), mk_li(Tmp, Imm1, - mk_branch_ri(Tmp, Cond, Imm2, + mk_branch_ri(Tmp, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred)). -mk_branch_ri(Src, Cond, Imm, TrueLab, FalseLab, Pred) -> - {FixAm1,NewCmpOp,Am1} = fix_aluop_imm('cmp', Imm), - FixAm1 ++ mk_cmp_bc(NewCmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred). - -mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred) -> - mk_cmp_bc('cmp', Src1, Src2, Cond, TrueLab, FalseLab, Pred). +mk_branch_ri(Src, CmpOp, Imm, Cond, TrueLab, FalseLab, Pred) -> + {FixAm1,NewCmpOp,Am1} = fix_aluop_imm(CmpOp, Imm), + FixAm1 ++ mk_branch_rr(Src, NewCmpOp, Am1, Cond, TrueLab, FalseLab, Pred). -mk_cmp_bc(CmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred) -> +mk_branch_rr(Src, CmpOp, Am1, Cond, TrueLab, FalseLab, Pred) -> [hipe_arm:mk_cmp(CmpOp, Src, Am1) | mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred)]. @@ -472,7 +478,6 @@ mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> end. mk_load_ii(Dst, Base1, Base2, LdOp) -> - io:format("~w: RTL load with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_load_ri(Dst, Tmp, Base2, LdOp)). @@ -485,7 +490,6 @@ mk_load_rr(Dst, Base1, Base2, LdOp) -> [hipe_arm:mk_load(LdOp, Dst, Am2)]. mk_ldrsb_ii(Dst, Base1, Base2) -> - io:format("~w: RTL load signed byte with two immediates\n", [?MODULE]), Tmp = new_untagged_temp(), mk_li(Tmp, Base1, mk_ldrsb_ri(Dst, Tmp, Base2)). @@ -543,7 +547,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Offset, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -567,13 +571,28 @@ mk_store(Src, Base, Offset, StoreSize) -> end. mk_store2(Src, Base, Offset, StOp) -> - case hipe_arm:is_temp(Offset) of + case hipe_arm:is_temp(Base) of true -> - mk_store_rr(Src, Base, Offset, StOp); - _ -> - mk_store_ri(Src, Base, Offset, StOp) + case hipe_arm:is_temp(Offset) of + true -> + mk_store_rr(Src, Base, Offset, StOp); + _ -> + mk_store_ri(Src, Base, Offset, StOp) + end; + false -> + case hipe_arm:is_temp(Offset) of + true -> + mk_store_ri(Src, Offset, Base, StOp); + _ -> + mk_store_ii(Src, Base, Offset, StOp) + end end. - + +mk_store_ii(Src, Base, Offset, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Offset, StOp)). + mk_store_ri(Src, Base, Offset, StOp) -> hipe_arm:mk_store(StOp, Src, Base, Offset, 'new', []). @@ -627,6 +646,7 @@ conv_alub_cond(RtlAluOp, Cond) -> % may be unsigned, depends on aluop case {RtlAluOp, Cond} of % handle allowed alub unsigned conditions {'add', 'ltu'} -> 'hs'; % add+ltu == unsigned overflow == carry set == hs %% add more cases when needed + {'sub', _} -> conv_branch_cond(Cond); _ -> conv_cond(Cond) end. diff --git a/lib/hipe/cerl/cerl_cconv.erl b/lib/hipe/cerl/cerl_cconv.erl index ac9d01ab0e..122e6ef039 100644 --- a/lib/hipe/cerl/cerl_cconv.erl +++ b/lib/hipe/cerl/cerl_cconv.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,11 +9,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% -%% @author Richard Carlsson <[email protected]> %% @copyright 2000-2004 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc Closure conversion of Core Erlang modules. This is done as a %% step in the translation from Core Erlang down to HiPE Icode, and is %% very much tied to the calling conventions used in HiPE native code. diff --git a/lib/hipe/cerl/cerl_closurean.erl b/lib/hipe/cerl/cerl_closurean.erl index d37c91e5c6..a2bd7fe0f0 100644 --- a/lib/hipe/cerl/cerl_closurean.erl +++ b/lib/hipe/cerl/cerl_closurean.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,15 +10,9 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% -%% ===================================================================== -%% Closure analysis of Core Erlang programs. -%% -%% Copyright (C) 2001-2002 Richard Carlsson -%% -%% Author contact: [email protected] -%% ===================================================================== +%% @copyright 2001-2002 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @doc Closure analysis of Core Erlang programs. %% TODO: might need a "top" (`any') element for any-length value lists. diff --git a/lib/hipe/cerl/cerl_hipe_primops.hrl b/lib/hipe/cerl/cerl_hipe_primops.hrl index 3efb9a3bdd..6e4d830b61 100644 --- a/lib/hipe/cerl/cerl_hipe_primops.hrl +++ b/lib/hipe/cerl/cerl_hipe_primops.hrl @@ -1,9 +1,3 @@ -%% ========================-*-erlang-*-================================= -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,15 +9,10 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% Predefined Core Erlang primitive operations used by HiPE -%% -%% Copyright (C) 2000 Richard Carlsson %% -%% Author contact: [email protected] -%% ===================================================================== +%% @copyright 2000 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @doc Predefined Core Erlang primitive operations used by HiPE. %% These definitions give the names of Core Erlang primops recognized by %% HiPE. Many of them (e.g., 'not'/'and'/'or', and the type tests), are diff --git a/lib/hipe/cerl/cerl_hipeify.erl b/lib/hipe/cerl/cerl_hipeify.erl index 6611abd204..137a54ba32 100644 --- a/lib/hipe/cerl/cerl_hipeify.erl +++ b/lib/hipe/cerl/cerl_hipeify.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,11 +9,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% -%% @author Richard Carlsson <[email protected]> %% @copyright 2000-2004 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc HiPE-ification of Core Erlang code. Prepares Core Erlang code %% for translation to ICode. %% @see cerl_to_icode diff --git a/lib/hipe/cerl/cerl_lib.erl b/lib/hipe/cerl/cerl_lib.erl index 0bc77909d9..3a6fb1cf51 100644 --- a/lib/hipe/cerl/cerl_lib.erl +++ b/lib/hipe/cerl/cerl_lib.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,10 +9,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% - +%% @copyright 1999-2002 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc Utility functions for Core Erlang abstract syntax trees. %% %% <p>Syntax trees are defined in the module <a diff --git a/lib/hipe/cerl/cerl_messagean.erl b/lib/hipe/cerl/cerl_messagean.erl index 7df0a245fb..c79e045bd0 100644 --- a/lib/hipe/cerl/cerl_messagean.erl +++ b/lib/hipe/cerl/cerl_messagean.erl @@ -1,8 +1,3 @@ -%% ===================================================================== -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,14 +10,9 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% -%% Message analysis of Core Erlang programs. -%% -%% Copyright (C) 2002 Richard Carlsson -%% -%% Author contact: [email protected] -%% ===================================================================== +%% @copyright 2002 Richard Carlsson +%% @author Richard Carlsson <[email protected]> +%% @doc Message analysis of Core Erlang programs. %% TODO: might need a "top" (`any') element for any-length value lists. diff --git a/lib/hipe/cerl/cerl_pmatch.erl b/lib/hipe/cerl/cerl_pmatch.erl index ca27fff1dd..fd7f589f08 100644 --- a/lib/hipe/cerl/cerl_pmatch.erl +++ b/lib/hipe/cerl/cerl_pmatch.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,11 +9,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% -%% @author Richard Carlsson <[email protected]> %% @copyright 2000-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% %% @doc Core Erlang pattern matching compiler. %% diff --git a/lib/hipe/cerl/cerl_prettypr.erl b/lib/hipe/cerl/cerl_prettypr.erl index f0acab99e3..c1c7250bbd 100644 --- a/lib/hipe/cerl/cerl_prettypr.erl +++ b/lib/hipe/cerl/cerl_prettypr.erl @@ -1,8 +1,3 @@ -%% ===================================================================== -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,16 +9,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% Core Erlang prettyprinter, using the 'prettypr' module. -%% -%% Copyright (C) 1999-2002 Richard Carlsson -%% -%% Author contact: [email protected] -%% ===================================================================== %% +%% @copyright 1999-2002 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc Core Erlang prettyprinter. %% %% <p>This module is a front end to the pretty-printing library module diff --git a/lib/hipe/cerl/cerl_to_icode.erl b/lib/hipe/cerl/cerl_to_icode.erl index ab131c2d01..e37eae8a03 100644 --- a/lib/hipe/cerl/cerl_to_icode.erl +++ b/lib/hipe/cerl/cerl_to_icode.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 4 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,11 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% -%% @author Richard Carlsson <[email protected]> %% @copyright 2000-2006 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc Translation from Core Erlang to HiPE Icode. %% TODO: annotate Icode leaf functions as such. @@ -2627,7 +2621,7 @@ icode_switch_val(Arg, Fail, Length, Cases) -> hipe_icode:mk_switch_val(Arg, Fail, Length, Cases). icode_switch_tuple_arity(Arg, Fail, Length, Cases) -> - SortedCases = lists:keysort(1, Cases), %% immitate BEAM compiler - Kostis + SortedCases = lists:keysort(1, Cases), %% imitate BEAM compiler - Kostis hipe_icode:mk_switch_tuple_arity(Arg, Fail, Length, SortedCases). diff --git a/lib/hipe/cerl/cerl_typean.erl b/lib/hipe/cerl/cerl_typean.erl index ddc48c7915..c5d84bdf2b 100644 --- a/lib/hipe/cerl/cerl_typean.erl +++ b/lib/hipe/cerl/cerl_typean.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 4 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,15 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% Type analysis of Core Erlang programs. -%% -%% Copyright (C) 2001-2002 Richard Carlsson -%% -%% Author contact: [email protected] %% +%% @copyright 2001-2002 Richard Carlsson +%% @author Richard Carlsson <[email protected]> %% @doc Type analysis of Core Erlang programs. %% TODO: filters must handle conjunctions for better precision! diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl index 230fce2e68..a3a936322a 100644 --- a/lib/hipe/cerl/erl_bif_types.erl +++ b/lib/hipe/cerl/erl_bif_types.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,16 +12,12 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% -%% ===================================================================== -%% Type information for Erlang Built-in functions (implemented in C) -%% -%% Copyright (C) 2002 Richard Carlsson -%% Copyright (C) 2006 Richard Carlsson, Tobias Lindahl and Kostis Sagonas -%% -%% ===================================================================== +%% @doc Type information for Erlang Built-in functions (implemented in C) +%% @copyright 2002 Richard Carlsson, 2006 Richard Carlsson, Tobias Lindahl +%% and Kostis Sagonas +%% @author Richard Carlsson <[email protected]> +%% @author Tobias Lindahl <[email protected]> +%% @author Kostis Sagonas <[email protected]> -module(erl_bif_types). @@ -560,6 +552,9 @@ type(erlang, byte_size, 1, Xs, Opaques) -> strict(erlang, byte_size, 1, Xs, fun (_) -> t_non_neg_integer() end, Opaques); %% Guard bif, needs to be here. +type(erlang, ceil, 1, Xs, Opaques) -> + strict(erlang, ceil, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. %% Also much more expressive than anything you could write in a spec... type(erlang, element, 2, Xs, Opaques) -> strict(erlang, element, 2, Xs, @@ -588,6 +583,9 @@ type(erlang, element, 2, Xs, Opaques) -> type(erlang, float, 1, Xs, Opaques) -> strict(erlang, float, 1, Xs, fun (_) -> t_float() end, Opaques); %% Guard bif, needs to be here. +type(erlang, floor, 1, Xs, Opaques) -> + strict(erlang, floor, 1, Xs, fun (_) -> t_integer() end, Opaques); +%% Guard bif, needs to be here. type(erlang, hd, 1, Xs, Opaques) -> strict(erlang, hd, 1, Xs, fun ([X]) -> t_cons_hd(X) end, Opaques); type(erlang, info, 1, Xs, _) -> type(erlang, system_info, 1, Xs); % alias @@ -997,9 +995,9 @@ type(erlang, tuple_to_list, 1, Xs, Opaques) -> end, Opaques); %%-- hipe_bifs ---------------------------------------------------------------- type(hipe_bifs, add_ref, 2, Xs, Opaques) -> - strict(hipe_bifs, add_ref, 2, Xs, fun (_) -> t_nil() end, Opaques); -type(hipe_bifs, alloc_data, 2, Xs, Opaques) -> - strict(hipe_bifs, alloc_data, 2, Xs, + strict(hipe_bifs, add_ref, 2, Xs, fun (_) -> t_atom('ok') end, Opaques); +type(hipe_bifs, alloc_data, 3, Xs, Opaques) -> + strict(hipe_bifs, alloc_data, 3, Xs, fun (_) -> t_integer() end, Opaques); % address type(hipe_bifs, array, 2, Xs, Opaques) -> strict(hipe_bifs, array, 2, Xs, fun (_) -> t_immarray() end, Opaques); @@ -1046,16 +1044,16 @@ type(hipe_bifs, call_count_on, 1, Xs, Opaques) -> fun (_) -> t_sup(t_atom('true'), t_nil()) end, Opaques); type(hipe_bifs, check_crc, 1, Xs, Opaques) -> strict(hipe_bifs, check_crc, 1, Xs, fun (_) -> t_boolean() end, Opaques); -type(hipe_bifs, enter_code, 2, Xs, Opaques) -> - strict(hipe_bifs, enter_code, 2, Xs, +type(hipe_bifs, enter_code, 3, Xs, Opaques) -> + strict(hipe_bifs, enter_code, 3, Xs, fun (_) -> t_tuple([t_integer(), %% XXX: The tuple below contains integers and %% is of size same as the length of the MFA list t_sup(t_nil(), t_binary())]) end, Opaques); -type(hipe_bifs, enter_sdesc, 1, Xs, Opaques) -> - strict(hipe_bifs, enter_sdesc, 1, Xs, fun (_) -> t_nil() end, Opaques); -type(hipe_bifs, find_na_or_make_stub, 2, Xs, Opaques) -> - strict(hipe_bifs, find_na_or_make_stub, 2, Xs, +type(hipe_bifs, enter_sdesc, 2, Xs, Opaques) -> + strict(hipe_bifs, enter_sdesc, 2, Xs, fun (_) -> t_nil() end, Opaques); +type(hipe_bifs, find_na_or_make_stub, 1, Xs, Opaques) -> + strict(hipe_bifs, find_na_or_make_stub, 1, Xs, fun (_) -> t_integer() end, Opaques); % address type(hipe_bifs, fun_to_address, 1, Xs, Opaques) -> strict(hipe_bifs, fun_to_address, 1, Xs, @@ -1065,12 +1063,6 @@ type(hipe_bifs, get_fe, 2, Xs, Opaques) -> type(hipe_bifs, get_rts_param, 1, Xs, Opaques) -> strict(hipe_bifs, get_rts_param, 1, Xs, fun (_) -> t_sup(t_integer(), t_nil()) end, Opaques); -type(hipe_bifs, invalidate_funinfo_native_addresses, 1, Xs, Opaques) -> - strict(hipe_bifs, invalidate_funinfo_native_addresses, 1, Xs, - fun (_) -> t_nil() end, Opaques); -type(hipe_bifs, mark_referred_from, 1, Xs, Opaques) -> - strict(hipe_bifs, mark_referred_from, 1, Xs, - fun (_) -> t_nil() end, Opaques); type(hipe_bifs, merge_term, 1, Xs, Opaques) -> strict(hipe_bifs, merge_term, 1, Xs, fun ([X]) -> X end, Opaques); type(hipe_bifs, nstack_used_size, 0, _, _Opaques) -> @@ -1082,21 +1074,18 @@ type(hipe_bifs, patch_insn, 3, Xs, Opaques) -> type(hipe_bifs, primop_address, 1, Xs, Opaques) -> strict(hipe_bifs, primop_address, 1, Xs, fun (_) -> t_sup(t_integer(), t_atom('false')) end, Opaques); -type(hipe_bifs, redirect_referred_from, 1, Xs, Opaques) -> - strict(hipe_bifs, redirect_referred_from, 1, Xs, - fun (_) -> t_nil() end, Opaques); type(hipe_bifs, ref, 1, Xs, Opaques) -> strict(hipe_bifs, ref, 1, Xs, fun (_) -> t_immarray() end, Opaques); type(hipe_bifs, ref_get, 1, Xs, Opaques) -> strict(hipe_bifs, ref_get, 1, Xs, fun (_) -> t_immediate() end, Opaques); type(hipe_bifs, ref_set, 2, Xs, Opaques) -> strict(hipe_bifs, ref_set, 2, Xs, fun (_) -> t_nil() end, Opaques); -type(hipe_bifs, remove_refs_from, 1, Xs, Opaques) -> - strict(hipe_bifs, remove_refs_from, 1, Xs, - fun (_) -> t_atom('ok') end, Opaques); type(hipe_bifs, set_funinfo_native_address, 3, Xs, Opaques) -> strict(hipe_bifs, set_funinfo_native_address, 3, Xs, fun (_) -> t_nil() end, Opaques); +type(hipe_bifs, commit_patch_load, 1, Xs, Opaques) -> + strict(hipe_bifs, commit_patch_load, 1, Xs, + fun (_) -> t_atom() end, Opaques); type(hipe_bifs, set_native_address, 3, Xs, Opaques) -> strict(hipe_bifs, set_native_address, 3, Xs, fun (_) -> t_nil() end, Opaques); @@ -1108,15 +1097,14 @@ type(hipe_bifs, system_crc, 0, _, _Opaques) -> type(hipe_bifs, term_to_word, 1, Xs, Opaques) -> strict(hipe_bifs, term_to_word, 1, Xs, fun (_) -> t_integer() end, Opaques); -type(hipe_bifs, update_code_size, 3, Xs, Opaques) -> - strict(hipe_bifs, update_code_size, 3, Xs, - fun (_) -> t_nil() end, Opaques); type(hipe_bifs, write_u8, 2, Xs, Opaques) -> strict(hipe_bifs, write_u8, 2, Xs, fun (_) -> t_nil() end, Opaques); type(hipe_bifs, write_u32, 2, Xs, Opaques) -> strict(hipe_bifs, write_u32, 2, Xs, fun (_) -> t_nil() end, Opaques); type(hipe_bifs, write_u64, 2, Xs, Opaques) -> strict(hipe_bifs, write_u64, 2, Xs, fun (_) -> t_nil() end, Opaques); +type(hipe_bifs, alloc_loader_state, 1, Xs, Opaques) -> + strict(hipe_bifs, alloc_loader_state, 1, Xs, fun (_) -> t_binary() end, Opaques); %%-- lists -------------------------------------------------------------------- type(lists, all, 2, Xs, Opaques) -> strict(lists, all, 2, Xs, @@ -2038,17 +2026,14 @@ arith_rem(Min1, Max1, Min2, Max2) -> Min1_geq_zero = infinity_geq(Min1, 0), Max1_leq_zero = infinity_geq(0, Max1), Max_range2 = infinity_max([infinity_abs(Min2), infinity_abs(Max2)]), - Max_range2_leq_zero = infinity_geq(0, Max_range2), - New_min = + New_min = if Min1_geq_zero -> 0; Max_range2 =:= 0 -> 0; - Max_range2_leq_zero -> infinity_add(Max_range2, 1); true -> infinity_add(infinity_inv(Max_range2), 1) end, New_max = if Max1_leq_zero -> 0; Max_range2 =:= 0 -> 0; - Max_range2_leq_zero -> infinity_add(infinity_inv(Max_range2), -1); true -> infinity_add(Max_range2, -1) end, {New_min, New_max}. @@ -2341,6 +2326,9 @@ arg_types(erlang, bit_size, 1) -> %% Guard bif, needs to be here. arg_types(erlang, byte_size, 1) -> [t_bitstr()]; +%% Guard bif, needs to be here. +arg_types(erlang, ceil, 1) -> + [t_number()]; arg_types(erlang, halt, 0) -> []; arg_types(erlang, halt, 1) -> @@ -2361,6 +2349,9 @@ arg_types(erlang, element, 2) -> arg_types(erlang, float, 1) -> [t_number()]; %% Guard bif, needs to be here. +arg_types(erlang, floor, 1) -> + [t_number()]; +%% Guard bif, needs to be here. arg_types(erlang, hd, 1) -> [t_cons()]; arg_types(erlang, info, 1) -> @@ -2458,9 +2449,9 @@ arg_types(hipe_bifs, add_ref, 2) -> t_integer(), t_sup(t_atom('call'), t_atom('load_mfa')), t_trampoline(), - t_sup(t_atom('remote'), t_atom('local'))])]; -arg_types(hipe_bifs, alloc_data, 2) -> - [t_integer(), t_integer()]; + t_binary()])]; +arg_types(hipe_bifs, alloc_data, 3) -> + [t_integer(), t_integer(), t_binary()]; arg_types(hipe_bifs, array, 2) -> [t_non_neg_fixnum(), t_immediate()]; arg_types(hipe_bifs, array_length, 1) -> @@ -2495,22 +2486,19 @@ arg_types(hipe_bifs, call_count_on, 1) -> [t_mfa()]; arg_types(hipe_bifs, check_crc, 1) -> [t_crc32()]; -arg_types(hipe_bifs, enter_code, 2) -> - [t_binary(), t_sup(t_nil(), t_tuple())]; -arg_types(hipe_bifs, enter_sdesc, 1) -> - [t_tuple([t_integer(), t_integer(), t_integer(), t_integer(), t_integer(), t_mfa()])]; -arg_types(hipe_bifs, find_na_or_make_stub, 2) -> - [t_mfa(), t_boolean()]; +arg_types(hipe_bifs, enter_code, 3) -> + [t_binary(), t_sup(t_nil(), t_tuple()), t_binary()]; +arg_types(hipe_bifs, enter_sdesc, 2) -> + [t_tuple([t_integer(), t_integer(), t_integer(), t_integer(), t_integer(), t_mfa()]), + t_binary()]; +arg_types(hipe_bifs, find_na_or_make_stub, 1) -> + [t_mfa()]; arg_types(hipe_bifs, fun_to_address, 1) -> [t_mfa()]; arg_types(hipe_bifs, get_fe, 2) -> [t_atom(), t_tuple([t_integer(), t_integer(), t_integer()])]; arg_types(hipe_bifs, get_rts_param, 1) -> [t_fixnum()]; -arg_types(hipe_bifs, invalidate_funinfo_native_addresses, 1) -> - [t_list(t_mfa())]; -arg_types(hipe_bifs, mark_referred_from, 1) -> - [t_mfa()]; arg_types(hipe_bifs, merge_term, 1) -> [t_any()]; arg_types(hipe_bifs, nstack_used_size, 0) -> @@ -2521,18 +2509,16 @@ arg_types(hipe_bifs, patch_insn, 3) -> [t_integer(), t_integer(), t_insn_type()]; arg_types(hipe_bifs, primop_address, 1) -> [t_atom()]; -arg_types(hipe_bifs, redirect_referred_from, 1) -> - [t_mfa()]; arg_types(hipe_bifs, ref, 1) -> [t_immediate()]; arg_types(hipe_bifs, ref_get, 1) -> [t_hiperef()]; arg_types(hipe_bifs, ref_set, 2) -> [t_hiperef(), t_immediate()]; -arg_types(hipe_bifs, remove_refs_from, 1) -> - [t_sup([t_mfa(), t_atom('all')])]; arg_types(hipe_bifs, set_funinfo_native_address, 3) -> arg_types(hipe_bifs, set_native_address, 3); +arg_types(hipe_bifs, commit_patch_load, 1) -> + [t_binary()]; arg_types(hipe_bifs, set_native_address, 3) -> [t_mfa(), t_integer(), t_boolean()]; arg_types(hipe_bifs, set_native_address_in_fe, 2) -> @@ -2541,14 +2527,15 @@ arg_types(hipe_bifs, system_crc, 0) -> []; arg_types(hipe_bifs, term_to_word, 1) -> [t_any()]; -arg_types(hipe_bifs, update_code_size, 3) -> - [t_atom(), t_sup(t_nil(), t_binary()), t_integer()]; arg_types(hipe_bifs, write_u8, 2) -> [t_integer(), t_byte()]; arg_types(hipe_bifs, write_u32, 2) -> [t_integer(), t_integer()]; arg_types(hipe_bifs, write_u64, 2) -> [t_integer(), t_integer()]; +arg_types(hipe_bifs, alloc_loader_state, 1) -> + [t_atom()]; + %%------- lists --------------------------------------------------------------- arg_types(lists, all, 2) -> [t_fun([t_any()], t_boolean()), t_list()]; diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index a5a3e8c136..0883a69918 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2017. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,14 +12,13 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% -%% ====================================================================== -%% Copyright (C) 2000-2003 Richard Carlsson -%% -%% ====================================================================== -%% Provides a representation of Erlang types. -%% +%% @copyright 2000-2003 Richard Carlsson, 2006-2009 Tobias Lindahl +%% @author Richard Carlsson <[email protected]> +%% @author Tobias Lindahl <[email protected]> +%% @author Kostis Sagonas <[email protected]> +%% @author Manouk Manoukian +%% @doc Provides a representation of Erlang types. + %% The initial author of this file is Richard Carlsson (2000-2004). %% In July 2006, the type representation was totally re-designed by %% Tobias Lindahl. This is the representation which is used currently. @@ -31,9 +26,6 @@ %% opaque types to the structure-based representation of types. %% During February and March 2009, Kostis Sagonas significantly %% cleaned up the type representation and added spec declarations. -%% -%% ====================================================================== -module(erl_types). @@ -236,7 +228,7 @@ -export([t_is_identifier/1]). -endif. --export_type([erl_type/0, opaques/0, type_table/0, mod_records/0, +-export_type([erl_type/0, opaques/0, type_table/0, var_table/0, cache/0]). %%-define(DEBUG, true). @@ -374,15 +366,17 @@ -type opaques() :: [erl_type()] | 'universe'. +-type file_line() :: {file:name(), erl_anno:line()}. -type record_key() :: {'record', atom()}. -type type_key() :: {'type' | 'opaque', mfa()}. --type record_value() :: [{atom(), erl_parse:abstract_expr(), erl_type()}]. --type type_value() :: {{module(), {file:name(), erl_anno:line()}, +-type field() :: {atom(), erl_parse:abstract_expr(), erl_type()}. +-type record_value() :: {file_line(), + [{RecordSize :: non_neg_integer(), [field()]}]}. +-type type_value() :: {{module(), file_line(), erl_parse:abstract_type(), ArgNames :: [atom()]}, erl_type()}. -type type_table() :: #{record_key() | type_key() => record_value() | type_value()}. --type mod_records() :: dict:dict(module(), type_table()). -opaque var_table() :: #{atom() => erl_type()}. @@ -536,7 +530,9 @@ list_contains_opaque(List, Opaques) -> 'error' | {'ok', erl_type(), erl_type()}. t_find_opaque_mismatch(T1, T2, Opaques) -> - catch t_find_opaque_mismatch(T1, T2, T2, Opaques). + try t_find_opaque_mismatch(T1, T2, T2, Opaques) + catch throw:error -> error + end. t_find_opaque_mismatch(?any, _Type, _TopType, _Opaques) -> error; t_find_opaque_mismatch(?none, _Type, _TopType, _Opaques) -> throw(error); @@ -588,8 +584,9 @@ t_find_opaque_mismatch_ordlists(L1, L2, TopType, Opaques) -> t_find_opaque_mismatch_list(List). t_find_opaque_mismatch_lists(L1, L2, _TopType, Opaques) -> - List = [catch t_find_opaque_mismatch(T1, T2, T2, Opaques) || - T1 <- L1, T2 <- L2], + List = [try t_find_opaque_mismatch(T1, T2, T2, Opaques) + catch throw:error -> error + end || T1 <- L1, T2 <- L2], t_find_opaque_mismatch_list(List). t_find_opaque_mismatch_list([]) -> throw(error); @@ -619,7 +616,9 @@ t_find_unknown_opaque(T1, T2, Opaques) -> %% is assumed to be taken from the contract. t_decorate_with_opaque(T1, T2, Opaques) -> - case t_is_equal(T1, T2) orelse not t_contains_opaque(T2) of + case + Opaques =:= [] orelse t_is_equal(T1, T2) orelse not t_contains_opaque(T2) + of true -> T1; false -> T = t_inf(T1, T2), @@ -4257,8 +4256,8 @@ t_to_string(?opaque(Set), RecDict) -> <- set_to_list(Set)], " | "); t_to_string(?matchstate(Pres, Slots), RecDict) -> - flat_format("ms(~s,~s)", [t_to_string(Pres, RecDict), - t_to_string(Slots,RecDict)]); + flat_format("ms(~ts,~ts)", [t_to_string(Pres, RecDict), + t_to_string(Slots,RecDict)]); t_to_string(?nil, _RecDict) -> "[]"; t_to_string(?nonempty_list(Contents, Termination), RecDict) -> @@ -4436,10 +4435,10 @@ opaque_type(Mod, Name, Args, _S, RecDict) -> opaque_name(Mod, Name, Extra) -> S = mod_name(Mod, Name), - flat_format("~s(~s)", [S, Extra]). + flat_format("~ts(~ts)", [S, Extra]). mod_name(Mod, Name) -> - flat_format("~w:~w", [Mod, Name]). + flat_format("~w:~tw", [Mod, Name]). %%============================================================================= %% @@ -4454,9 +4453,17 @@ mod_name(Mod, Name) -> -type site() :: {'type', mta()} | {'spec', mfa()} | {'record', mra()}. -type cache_key() :: {module(), atom(), expand_depth(), [erl_type()], type_names()}. --opaque cache() :: #{cache_key() => {erl_type(), expand_limit()}}. +-type mod_type_table() :: ets:tid(). +-type mod_records() :: dict:dict(module(), type_table()). +-record(cache, + { + types = maps:new() :: #{cache_key() => {erl_type(), expand_limit()}}, + mod_recs = {mrecs, dict:new()} :: {'mrecs', mod_records()} + }). + +-opaque cache() :: #cache{}. --spec t_from_form(parse_form(), sets:set(mfa()), site(), mod_records(), +-spec t_from_form(parse_form(), sets:set(mfa()), site(), mod_type_table(), var_table(), cache()) -> {erl_type(), cache()}. t_from_form(Form, ExpTypes, Site, RecDict, VarTab, Cache) -> @@ -4468,11 +4475,12 @@ t_from_form(Form, ExpTypes, Site, RecDict, VarTab, Cache) -> t_from_form_without_remote(Form, Site, TypeTable) -> Module = site_module(Site), - RecDict = dict:from_list([{Module, TypeTable}]), + ModRecs = dict:from_list([{Module, TypeTable}]), ExpTypes = replace_by_none, VarTab = var_table__new(), - Cache = cache__new(), - t_from_form1(Form, ExpTypes, Site, RecDict, VarTab, Cache). + Cache0 = cache__new(), + Cache = Cache0#cache{mod_recs = {mrecs, ModRecs}}, + t_from_form1(Form, ExpTypes, Site, undefined, VarTab, Cache). %% REC_TYPE_LIMIT is used for limiting the depth of recursive types. %% EXPAND_LIMIT is used for limiting the size of types by @@ -4487,13 +4495,13 @@ t_from_form_without_remote(Form, Site, TypeTable) -> -record(from_form, {site :: site(), xtypes :: sets:set(mfa()) | 'replace_by_none', - mrecs :: mod_records(), + mrecs :: 'undefined' | mod_type_table(), vtab :: var_table(), tnames :: type_names()}). -spec t_from_form1(parse_form(), sets:set(mfa()) | 'replace_by_none', - site(), mod_records(), var_table(), cache()) -> - {erl_type(), cache()}. + site(), 'undefined' | mod_type_table(), var_table(), + cache()) -> {erl_type(), cache()}. t_from_form1(Form, ET, Site, MR, V, C) -> TypeNames = initial_typenames(Site), @@ -4739,13 +4747,13 @@ from_form({opaque, _L, Name, {Mod, Args, Rep}}, _S, _D, L, C) -> builtin_type(Name, Type, S, D, L, C) -> #from_form{site = Site, mrecs = MR} = S, M = site_module(Site), - case dict:find(M, MR) of - {ok, R} -> + case lookup_module_types(M, MR, C) of + {R, C1} -> case lookup_type(Name, 0, R) of {_, {{_M, _FL, _F, _A}, _T}} -> - type_from_form(Name, [], S, D, L, C); + type_from_form(Name, [], S, D, L, C1); error -> - {Type, L, C} + {Type, L, C1} end; error -> {Type, L, C} @@ -4758,9 +4766,9 @@ type_from_form(Name, Args, S, D, L, C) -> TypeName = {type, {Module, Name, ArgsLen}}, case can_unfold_more(TypeName, TypeNames) of true -> - {ok, R} = dict:find(Module, MR), + {R, C1} = lookup_module_types(Module, MR, C), type_from_form1(Name, Args, ArgsLen, R, TypeName, TypeNames, - S, D, L, C); + S, D, L, C1); false -> {t_any(), L, C} end. @@ -4799,7 +4807,7 @@ type_from_form1(Name, Args, ArgsLen, R, TypeName, TypeNames, S, D, L, C) -> {NewType, L3, C4} end; error -> - Msg = io_lib:format("Unable to find type ~w/~w\n", + Msg = io_lib:format("Unable to find type ~tw/~w\n", [Name, ArgsLen]), throw({error, Msg}) end. @@ -4812,24 +4820,24 @@ remote_from_form(RemMod, Name, Args, S, D, L, C) -> true -> ArgsLen = length(Args), MFA = {RemMod, Name, ArgsLen}, - case dict:find(RemMod, MR) of + case lookup_module_types(RemMod, MR, C) of error -> self() ! {self(), ext_types, MFA}, {t_any(), L, C}; - {ok, RemDict} -> + {RemDict, C1} -> case sets:is_element(MFA, ET) of true -> RemType = {type, MFA}, case can_unfold_more(RemType, TypeNames) of true -> remote_from_form1(RemMod, Name, Args, ArgsLen, RemDict, - RemType, TypeNames, S, D, L, C); + RemType, TypeNames, S, D, L, C1); false -> - {t_any(), L, C} + {t_any(), L, C1} end; false -> self() ! {self(), ext_types, {RemMod, Name, ArgsLen}}, - {t_any(), L, C} + {t_any(), L, C1} end end end. @@ -4871,7 +4879,7 @@ remote_from_form1(RemMod, Name, Args, ArgsLen, RemDict, RemType, TypeNames, {NewType, L3, C4} end; error -> - Msg = io_lib:format("Unable to find remote type ~w:~w()\n", + Msg = io_lib:format("Unable to find remote type ~w:~tw()\n", [RemMod, Name]), throw({error, Msg}) end. @@ -4904,33 +4912,33 @@ record_from_form({atom, _, Name}, ModFields, S, D0, L0, C) -> case can_unfold_more(RecordType, TypeNames) of true -> M = site_module(Site), - {ok, R} = dict:find(M, MR), + {R, C1} = lookup_module_types(M, MR, C), case lookup_record(Name, R) of {ok, DeclFields} -> NewTypeNames = [RecordType|TypeNames], Site1 = {record, {M, Name, length(DeclFields)}}, S1 = S#from_form{site = Site1, tnames = NewTypeNames}, Fun = fun(D, L) -> - {GetModRec, L1, C1} = - get_mod_record(ModFields, DeclFields, S1, D, L, C), + {GetModRec, L1, C2} = + get_mod_record(ModFields, DeclFields, S1, D, L, C1), case GetModRec of {error, FieldName} -> throw({error, - io_lib:format("Illegal declaration of #~w{~w}\n", + io_lib:format("Illegal declaration of #~tw{~tw}\n", [Name, FieldName])}); {ok, NewFields} -> S2 = S1#from_form{vtab = var_table__new()}, - {NewFields1, L2, C2} = - fields_from_form(NewFields, S2, D, L1, C1), + {NewFields1, L2, C3} = + fields_from_form(NewFields, S2, D, L1, C2), Rec = t_tuple( [t_atom(Name)|[Type || {_FieldName, Type} <- NewFields1]]), - {Rec, L2, C2} + {Rec, L2, C3} end end, recur_limit(Fun, D0, L0, RecordType, TypeNames); error -> - throw({error, io_lib:format("Unknown record #~w{}\n", [Name])}) + throw({error, io_lib:format("Unknown record #~tw{}\n", [Name])}) end; false -> {t_any(), L0, C} @@ -5056,7 +5064,7 @@ recur_limit(Fun, D, L, TypeName, TypeNames) -> end. -spec t_check_record_fields(parse_form(), sets:set(mfa()), site(), - mod_records(), var_table(), cache()) -> cache(). + mod_type_table(), var_table(), cache()) -> cache(). t_check_record_fields(Form, ExpTypes, Site, RecDict, VarTable, Cache) -> State = #from_form{site = Site, @@ -5100,13 +5108,13 @@ check_record_fields({user_type, _L, _Name, Args}, S, C) -> check_record({atom, _, Name}, ModFields, S, C) -> #from_form{site = Site, mrecs = MR} = S, M = site_module(Site), - {ok, R} = dict:find(M, MR), + {R, C1} = lookup_module_types(M, MR, C), {ok, DeclFields} = lookup_record(Name, R), - case check_fields(Name, ModFields, DeclFields, S, C) of + case check_fields(Name, ModFields, DeclFields, S, C1) of {error, FieldName} -> - throw({error, io_lib:format("Illegal declaration of #~w{~w}\n", - [Name, FieldName])}); - C1 -> C1 + throw({error, io_lib:format("Illegal declaration of #~tw{~tw}\n", + [Name, FieldName])}); + C2 -> C2 end. check_fields(RecName, [{type, _, field_type, [{atom, _, Name}, Abstr]}|Left], @@ -5136,7 +5144,7 @@ site_module({_, {Module, _, _}}) -> -spec cache__new() -> cache(). cache__new() -> - maps:new(). + #cache{}. -spec cache_key(module(), atom(), [erl_type()], type_names(), expand_depth()) -> cache_key(). @@ -5153,8 +5161,8 @@ cache_key(Module, Name, ArgTypes, TypeNames, D) -> -spec cache_find(cache_key(), cache()) -> {erl_type(), expand_limit()} | 'error'. -cache_find(Key, Cache) -> - case maps:find(Key, Cache) of +cache_find(Key, #cache{types = Types}) -> + case maps:find(Key, Types) of {ok, Value} -> Value; error -> @@ -5166,12 +5174,13 @@ cache_find(Key, Cache) -> cache_put(_Key, _Type, DeltaL, Cache) when DeltaL < 0 -> %% The type is truncated; do not reuse it. Cache; -cache_put(Key, Type, DeltaL, Cache) -> - maps:put(Key, {Type, DeltaL}, Cache). +cache_put(Key, Type, DeltaL, #cache{types = Types} = Cache) -> + NewTypes = maps:put(Key, {Type, DeltaL}, Types), + Cache#cache{types = NewTypes}. --spec t_var_names([erl_type()]) -> [atom()]. +-spec t_var_names([parse_form()]) -> [atom()]. -t_var_names([{var, _, Name}|L]) when L =/= '_' -> +t_var_names([{var, _, Name}|L]) when Name =/= '_' -> [Name|t_var_names(L)]; t_var_names([]) -> []. @@ -5197,10 +5206,10 @@ t_form_to_string({op, _L, _Op, _Arg1, _Arg2} = Op) -> t_form_to_string({ann_type, _L, [Var, Type]}) -> t_form_to_string(Var) ++ "::" ++ t_form_to_string(Type); t_form_to_string({paren_type, _L, [Type]}) -> - flat_format("(~s)", [t_form_to_string(Type)]); + flat_format("(~ts)", [t_form_to_string(Type)]); t_form_to_string({remote_type, _L, [{atom, _, Mod}, {atom, _, Name}, Args]}) -> ArgString = "(" ++ string:join(t_form_to_string_list(Args), ",") ++ ")", - flat_format("~w:~w", [Mod, Name]) ++ ArgString; + flat_format("~w:~tw", [Mod, Name]) ++ ArgString; t_form_to_string({type, _L, arity, []}) -> "arity()"; t_form_to_string({type, _L, binary, []}) -> "binary()"; t_form_to_string({type, _L, binary, [Base, Unit]} = Type) -> @@ -5250,12 +5259,12 @@ t_form_to_string({type, _L, range, [From, To]} = Type) -> _ -> flat_format("Badly formed type ~w",[Type]) end; t_form_to_string({type, _L, record, [{atom, _, Name}]}) -> - flat_format("#~w{}", [Name]); + flat_format("#~tw{}", [Name]); t_form_to_string({type, _L, record, [{atom, _, Name}|Fields]}) -> FieldString = string:join(t_form_to_string_list(Fields), ","), - flat_format("#~w{~s}", [Name, FieldString]); + flat_format("#~tw{~ts}", [Name, FieldString]); t_form_to_string({type, _L, field_type, [{atom, _, Name}, Type]}) -> - flat_format("~w::~s", [Name, t_form_to_string(Type)]); + flat_format("~tw::~ts", [Name, t_form_to_string(Type)]); t_form_to_string({type, _L, term, []}) -> "term()"; t_form_to_string({type, _L, timeout, []}) -> "timeout()"; t_form_to_string({type, _L, tuple, any}) -> "tuple()"; @@ -5266,14 +5275,12 @@ t_form_to_string({type, _L, union, Args}) -> t_form_to_string({type, _L, Name, []} = T) -> try M = mod, - D0 = maps:new(), - MR = dict:from_list([{M, D0}]), Site = {type, {M,Name,0}}, V = var_table__new(), C = cache__new(), State = #from_form{site = Site, xtypes = sets:new(), - mrecs = MR, + mrecs = 'undefined', vtab = V, tnames = []}, {T1, _, _} = from_form(T, State, _Deep=1000, _ALot=1000000, C), @@ -5281,7 +5288,7 @@ t_form_to_string({type, _L, Name, []} = T) -> catch throw:{error, _} -> atom_to_string(Name) ++ "()" end; t_form_to_string({user_type, _L, Name, List}) -> - flat_format("~w(~s)", + flat_format("~tw(~ts)", [Name, string:join(t_form_to_string_list(List), ",")]); t_form_to_string({type, L, Name, List}) -> %% Compatibility: modules compiled before Erlang/OTP 18.0. @@ -5298,7 +5305,7 @@ t_form_to_string_list([], Acc) -> -spec atom_to_string(atom()) -> string(). atom_to_string(Atom) -> - flat_format("~w", [Atom]). + flat_format("~tw", [Atom]). %%============================================================================= %% @@ -5327,6 +5334,24 @@ is_erl_type(?unit) -> true; is_erl_type(#c{}) -> true; is_erl_type(_) -> false. +-spec lookup_module_types(module(), mod_type_table(), cache()) -> + 'error' | {type_table(), cache()}. + +lookup_module_types(Module, CodeTable, Cache) -> + #cache{mod_recs = {mrecs, MRecs}} = Cache, + case dict:find(Module, MRecs) of + {ok, R} -> + {R, Cache}; + error -> + try ets:lookup_element(CodeTable, Module, 2) of + R -> + NewMRecs = dict:store(Module, R, MRecs), + {R, Cache#cache{mod_recs = {mrecs, NewMRecs}}} + catch + _:_ -> error + end + end. + -spec lookup_record(atom(), type_table()) -> 'error' | {'ok', [{atom(), parse_form(), erl_type()}]}. @@ -5529,7 +5554,7 @@ set_size(Set) -> set_to_string(Set) -> L = [case is_atom(X) of true -> io_lib:write_string(atom_to_list(X), $'); % stupid emacs ' - false -> flat_format("~w", [X]) + false -> flat_format("~tw", [X]) end || X <- set_to_list(Set)], string:join(L, " | "). diff --git a/lib/hipe/doc/src/notes.xml b/lib/hipe/doc/src/notes.xml index 627794aee8..9167d0aaec 100644 --- a/lib/hipe/doc/src/notes.xml +++ b/lib/hipe/doc/src/notes.xml @@ -4,7 +4,7 @@ <chapter> <header> <copyright> - <year>2006</year><year>2016</year> + <year>2006</year><year>2017</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -31,6 +31,101 @@ </header> <p>This document describes the changes made to HiPE.</p> +<section><title>Hipe 3.16</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Fix hipe compiler flags <c>o0</c> and <c>o1</c> that have + previously been ignored by mistake.</p> + <p> + Own Id: OTP-13862 Aux Id: PR-1154 </p> + </item> + <item> + <p> + Fix LLVM backend to not convert all remote calls to own + module, like <c>?MODULE:foo()</c>, into local calls.</p> + <p> + Own Id: OTP-13983</p> + </item> + <item> + <p> + Hipe optional LLVM backend does require LLVM version 3.9 + or later as older versions forced strong dependencies on + erts internals structures.</p> + <p> + Own Id: OTP-14238</p> + </item> + <item> + <p> + Fix a bug that has been seen causing failed loading of + hipe compiled modules on NetBSD due to unaligned data + pointers.</p> + <p> + Own Id: OTP-14302 Aux Id: ERL-376, PR-1386 </p> + </item> + <item> + <p> + Fix miscompilation bug in hipe that could cause wrong + function clause to be called from non-tail calls, where + the return value is unused, if the right function clause + is only reachable from those non-tail calls.</p> + <p> + Own Id: OTP-14306 Aux Id: ERL-278, PR-1392 </p> + </item> + </list> + </section> + + + <section><title>Improvements and New Features</title> + <list> + <item> + <p> + Improve hipe compilation time for large functions.</p> + <p> + Own Id: OTP-13810 Aux Id: PR-1124 </p> + </item> + <item> + <p>Replaced usage of deprecated symbolic <seealso + marker="erts:erlang#type-time_unit"><c>time + unit</c></seealso> representations.</p> + <p> + Own Id: OTP-13831 Aux Id: OTP-13735 </p> + </item> + <item> + <p> + Speed up hipe compile time register allocation for larger + function.</p> + <p> + Own Id: OTP-13879</p> + </item> + <item> + <p> + Various code generation improvements.</p> + <p> + Own Id: OTP-14261 Aux Id: PR-1360 </p> + </item> + <item> + <p> + Improve hipe compiler to generate code with better CPU + register utilization at runtime by the use of 'Live Range + Splitting' techniques.</p> + <p> + Own Id: OTP-14293 Aux Id: PR-1380 </p> + </item> + <item> + <p> + Allow HiPE to run on VM built with + <c>--enable-m32-build</c>.</p> + <p> + Own Id: OTP-14330 Aux Id: PR-1397 </p> + </item> + </list> + </section> + +</section> + <section><title>Hipe 3.15.4</title> <section><title>Fixed Bugs and Malfunctions</title> @@ -1317,7 +1412,7 @@ <list> <item> <p> - Miscellanous updates.</p> + Miscellaneous updates.</p> <p> Own Id: OTP-8038</p> </item> diff --git a/lib/hipe/flow/cfg.hrl b/lib/hipe/flow/cfg.hrl index 2575b9e38a..8d0f8855bb 100644 --- a/lib/hipe/flow/cfg.hrl +++ b/lib/hipe/flow/cfg.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,15 +11,11 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%============================================================================ %% File : cfg.hrl %% Author : Kostis Sagonas <[email protected]> %% Purpose : Contains typed record declarations for the CFG data structures -%% -%% $Id$ %%============================================================================ -type cfg_lbl() :: non_neg_integer(). diff --git a/lib/hipe/flow/cfg.inc b/lib/hipe/flow/cfg.inc index 0bad2a8dd7..17342d3b60 100644 --- a/lib/hipe/flow/cfg.inc +++ b/lib/hipe/flow/cfg.inc @@ -2,10 +2,6 @@ %% -*- erlang-indent-level: 2 -*- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -17,8 +13,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -32,6 +26,8 @@ %% bb(CFG, Label) - returns the basic block named 'Label' from the CFG. %% bb_add(CFG, Label, NewBB) - makes NewBB the basic block associated %% with Label. +%% map_bbs(Fun, CFG) - map over all code without changing control flow. +%% fold_bbs(Fun, Acc, CFG) - fold over the basic blocks in a CFG. %% succ(Map, Label) - returns a list of successors of basic block 'Label'. %% pred(Map, Label) - returns the predecessors of basic block 'Label'. %% fallthrough(CFG, Label) - returns fall-through successor of basic @@ -89,6 +85,7 @@ -define(BREADTH_ORDER,true). % for linear scan -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -endif. %%===================================================================== @@ -215,7 +212,7 @@ info_update(CFG, I) -> -ifndef(GEN_CFG). -spec other_entrypoints(cfg()) -> [cfg_lbl()]. -%% @doc Returns a list of labels that are refered to from the data section. +%% @doc Returns a list of labels that are referred to from the data section. other_entrypoints(CFG) -> hipe_consttab:referred_labels(data(CFG)). @@ -307,11 +304,7 @@ redirect_phis([I|Rest], OldPred, NewPred, Acc) -> %% @doc Adds a new basic block to a CFG (or updates an existing block). bb_add(CFG, Label, NewBB) -> %% Asserting that the NewBB is a legal basic block - Last = hipe_bb:last(NewBB), - case is_branch(Last) of - true -> ok; - false -> throw({?MODULE, {"Basic block ends without branch", Last}}) - end, + Last = assert_bb(NewBB), %% The order of the elements from branch_successors/1 is %% significant. It determines the basic block order when the CFG is %% converted to linear form. That order may have been tuned for @@ -339,11 +332,53 @@ bb_add(CFG, Label, NewBB) -> HT2, OldSucc -- Succ), CFG#cfg{table = HT3}. +-ifdef(MAP_FOLD_NEEDED). +-spec map_bbs(fun((cfg_lbl(), hipe_bb:bb()) -> hipe_bb:bb()), cfg()) -> cfg(). +%% @doc Map over the code in a CFG without changing any control flow. +map_bbs(Fun, CFG = #cfg{table=HT0}) -> + HT = gb_trees:map( + fun(Lbl, {OldBB, OldSucc, OldPred}) -> + NewBB = Fun(Lbl, OldBB), + %% Assert preconditions + NewLast = assert_bb(NewBB), + OldSucc = remove_duplicates(branch_successors(NewLast)), + {NewBB, OldSucc, OldPred} + end, HT0), + CFG#cfg{table=HT}. + +-spec fold_bbs(fun((cfg_lbl(), hipe_bb:bb(), Acc) -> Acc), Acc, cfg()) -> Acc. +%% @doc Fold over the basic blocks in a CFG in unspecified order. +fold_bbs(Fun, InitAcc, #cfg{table=HT}) -> + gb_trees_fold(fun(Lbl, {BB, _, _}, Acc) -> Fun(Lbl, BB, Acc) end, + InitAcc, HT). + +gb_trees_fold(Fun, InitAcc, Tree) -> + gb_trees_fold_1(Fun, InitAcc, gb_trees:iterator(Tree)). + +gb_trees_fold_1(Fun, InitAcc, Iter0) -> + case gb_trees:next(Iter0) of + none -> InitAcc; + {Key, Value, Iter} -> + gb_trees_fold_1(Fun, Fun(Key, Value, InitAcc), Iter) + end. +-endif. % MAP_FOLD_NEEDED + +assert_bb(BB) -> + assert_bb_is(hipe_bb:code(BB)). + +assert_bb_is([Last]) -> + true = is_branch(Last), + Last; +assert_bb_is([I|Is]) -> + false = is_branch(I), + false = is_label(I), + assert_bb_is(Is). + remove_pred(HT, FromL, PredL) -> case gb_trees:lookup(FromL, HT) of {value, {Block, Succ, Preds}} -> Code = hipe_bb:code(Block), - NewCode = remove_pred_from_phis(Code, PredL, []), + NewCode = remove_pred_from_phis(PredL, Code), NewBlock = hipe_bb:code_update(Block, NewCode), gb_trees:update(FromL, {NewBlock,Succ,lists:delete(PredL,Preds)}, HT); none -> @@ -374,20 +409,20 @@ add_pred(HT, ToL, PredL) -> -ifdef(CFG_CAN_HAVE_PHI_NODES). %% phi-instructions in a removed block's successors must be aware of %% the change. -remove_pred_from_phis(List = [I|Left], Label, Acc) -> +remove_pred_from_phis(Label, List = [I|Left]) -> case is_phi(I) of - true -> - NewAcc = [phi_remove_pred(I, Label)|Acc], - remove_pred_from_phis(Left, Label, NewAcc); + true -> + NewI = phi_remove_pred(I, Label), + [NewI | remove_pred_from_phis(Label, Left)]; false -> - lists:reverse(Acc) ++ List + List end; -remove_pred_from_phis([], _Label, Acc) -> - lists:reverse(Acc). +remove_pred_from_phis(_Label, []) -> + []. -else. %% this is used for code representations like those of back-ends which %% do not have phi-nodes. -remove_pred_from_phis(Code, _Label, _Acc) -> +remove_pred_from_phis(_Label, Code) -> Code. -endif. @@ -927,24 +962,52 @@ merge(BB, BB2, BB2_Label) -> remove_unreachable_code(CFG) -> Start = start_label(CFG), - Reachable = find_reachable([Start], CFG, gb_sets:from_list([Start])), - %% Reachable is an ordset: it comes from gb_sets:to_list/1. - %% So use ordset:subtract instead of '--' below. - Labels = ordsets:from_list(labels(CFG)), - case ordsets:subtract(Labels, Reachable) of - [] -> - CFG; + %% No unreachable block will make another block reachable, so no fixpoint + %% looping is required + Reachable = find_reachable([], [Start], CFG, #{Start=>[]}), + case [L || L <- labels(CFG), not maps:is_key(L, Reachable)] of + [] -> CFG; Remove -> - NewCFG = lists:foldl(fun(X, Acc) -> bb_remove(Acc, X) end, CFG, Remove), - remove_unreachable_code(NewCFG) + HT0 = CFG#cfg.table, + HT1 = lists:foldl(fun gb_trees:delete/2, HT0, Remove), + ReachableP = fun(Lbl) -> maps:is_key(Lbl, Reachable) end, + HT = gb_trees:map(fun(_,B)->prune_preds(B, ReachableP)end, HT1), + CFG#cfg{table=HT} end. -find_reachable([Label|Left], CFG, Acc) -> - NewAcc = gb_sets:add(Label, Acc), - Succ = succ(CFG, Label), - find_reachable([X || X <- Succ, not gb_sets:is_member(X, Acc)] ++ Left, - CFG, NewAcc); -find_reachable([], _CFG, Acc) -> - gb_sets:to_list(Acc). +find_reachable([], [], _CFG, Acc) -> Acc; +find_reachable([Succ|Succs], Left, CFG, Acc) -> + case Acc of + #{Succ := _} -> find_reachable(Succs, Left, CFG, Acc); + #{} -> find_reachable(Succs, [Succ|Left], CFG, Acc#{Succ => []}) + end; +find_reachable([], [Label|Left], CFG, Acc) -> + find_reachable(succ(CFG, Label), Left, CFG, Acc). + +%% Batch prune unreachable predecessors. Asymptotically faster than deleting +%% unreachable blocks one at a time with bb_remove, at least when +%% CFG_CAN_HAVE_PHI_NODES is undefined. Otherwise a phi_remove_preds might be +%% needed to achieve that. +prune_preds(B={Block, Succ, Preds}, ReachableP) -> + case lists:partition(ReachableP, Preds) of + {_, []} -> B; + {NewPreds, Unreach} -> + NewCode = remove_preds_from_phis(Unreach, hipe_bb:code(Block)), + {hipe_bb:code_update(Block, NewCode), Succ, NewPreds} + end. +-ifdef(CFG_CAN_HAVE_PHI_NODES). +remove_preds_from_phis(_, []) -> []; +remove_preds_from_phis(Preds, List=[I|Left]) -> + case is_phi(I) of + false -> List; + true -> + NewI = lists:foldl(fun(L,IA)->phi_remove_pred(IA,L)end, + I, Preds), + [NewI | remove_preds_from_phis(Preds, Left)] + end. +-else. +remove_preds_from_phis(_, Code) -> Code. -endif. + +-endif. %% -ifdef(REMOVE_UNREACHABLE_CODE) diff --git a/lib/hipe/flow/ebb.inc b/lib/hipe/flow/ebb.inc index 529be72dc8..e4b7fd0efb 100644 --- a/lib/hipe/flow/ebb.inc +++ b/lib/hipe/flow/ebb.inc @@ -1,9 +1,5 @@ %% -*- Erlang -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -46,12 +40,14 @@ %% | {ebb_leaf, SuccesorLabel} %%-------------------------------------------------------------------- -%% XXX: Cheating big time! no recursive types --type ebb() :: {ebb_node, icode_lbl(), _} - | {ebb_leaf, icode_lbl()}. +-type ebb() :: ebb_node() + | ebb_leaf(). -record(ebb_node, {label :: icode_lbl(), successors :: [ebb()]}). +-type ebb_node() :: #ebb_node{}. + -record(ebb_leaf, {successor :: icode_lbl()}). +-type ebb_leaf() :: #ebb_leaf{}. %%-------------------------------------------------------------------- %% Returns a list of extended basic blocks. @@ -199,7 +195,7 @@ add_succ([Lbl|Lbls], Visited, Node, MkFun, EBBs, CFG) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec mk_node(icode_lbl(), [ebb()]) -> #ebb_node{}. +-spec mk_node(icode_lbl(), [ebb()]) -> ebb_node(). mk_node(Label, Successors) -> #ebb_node{label=Label, successors=Successors}. -spec node_label(#ebb_node{}) -> icode_lbl(). @@ -208,11 +204,11 @@ node_label(#ebb_node{label=Label}) -> Label. -spec node_successors(#ebb_node{}) -> [ebb()]. node_successors(#ebb_node{successors=Successors}) -> Successors. --spec mk_leaf(icode_lbl()) -> #ebb_leaf{}. +-spec mk_leaf(icode_lbl()) -> ebb_leaf(). mk_leaf(NextEbb) -> #ebb_leaf{successor=NextEbb}. %% leaf_next(Leaf) -> Leaf#ebb_leaf.successor. --spec type(#ebb_node{}) -> 'node' ; (#ebb_leaf{}) -> 'leaf'. +-spec type(ebb_node()) -> 'node' ; (ebb_leaf()) -> 'leaf'. type(#ebb_node{}) -> node; type(#ebb_leaf{}) -> leaf. diff --git a/lib/hipe/flow/hipe_bb.erl b/lib/hipe/flow/hipe_bb.erl index 2da3a6dc99..f4dad59e61 100644 --- a/lib/hipe/flow/hipe_bb.erl +++ b/lib/hipe/flow/hipe_bb.erl @@ -1,8 +1,4 @@ %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +10,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -41,6 +35,8 @@ -include("hipe_bb.hrl"). +-export_type([bb/0]). + %% %% Constructs a basic block. %% Returns a basic block: {bb, Code} diff --git a/lib/hipe/flow/hipe_bb.hrl b/lib/hipe/flow/hipe_bb.hrl index cd4d788aef..5cb5c1b370 100644 --- a/lib/hipe/flow/hipe_bb.hrl +++ b/lib/hipe/flow/hipe_bb.hrl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%------------------------------------------------------------------- %%% File : bb.hrl diff --git a/lib/hipe/flow/hipe_dominators.erl b/lib/hipe/flow/hipe_dominators.erl index 72c16b5688..749edd4f72 100644 --- a/lib/hipe/flow/hipe_dominators.erl +++ b/lib/hipe/flow/hipe_dominators.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%------------------------------------------------------------------------ %% File : hipe_dominators.erl %% Author : Christoffer Vikström <[email protected]> @@ -323,7 +317,7 @@ updateCell(Value, Field, WD) -> %%>----------------------------------------------------------------------< %% Procedure : dfs/1 %% Purpose : The main purpose of this function is to traverse the CFG in -%% a depth first order. It is aslo used to initialize certain +%% a depth first order. It is also used to initialize certain %% elements defined in a workDataCell. %% Arguments : CFG - a Control Flow Graph representation %% Returns : A table (WorkData) and the total number of elements in diff --git a/lib/hipe/flow/hipe_gen_cfg.erl b/lib/hipe/flow/hipe_gen_cfg.erl index a6d053f505..cc3a1b5b73 100644 --- a/lib/hipe/flow/hipe_gen_cfg.erl +++ b/lib/hipe/flow/hipe_gen_cfg.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,9 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_gen_cfg). @@ -35,4 +27,3 @@ -spec pred(cfg(), cfg_lbl()) -> [cfg_lbl()]. -include("cfg.inc"). - diff --git a/lib/hipe/flow/liveness.inc b/lib/hipe/flow/liveness.inc index a1caa3e0ad..3e9d7b3c96 100644 --- a/lib/hipe/flow/liveness.inc +++ b/lib/hipe/flow/liveness.inc @@ -1,10 +1,6 @@ %% -*- Erlang -*- %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -49,6 +43,10 @@ -endif. -include("../flow/cfg.hrl"). +-include("../main/hipe.hrl"). + +-opaque liveness() :: map(). +-export_type([liveness/0]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -72,7 +70,7 @@ %% The generic liveness analysis %% --spec analyze(cfg()) -> gb_trees:tree(). +-spec analyze(cfg()) -> liveness(). -ifdef(HIPE_LIVENESS_CALC_LARGEST_LIVESET). analyze(CFG) -> @@ -188,6 +186,7 @@ update_livein(Label, NewLiveIn, Liveness) -> %% %% LiveOut for a block is the union of the successors LiveIn %% +-spec liveout(liveness(), _) -> [_]. liveout(Liveness, L) -> Succ = successors(L, Liveness), @@ -210,7 +209,7 @@ successors(L, Liveness) -> {_GK, _LiveIn, Successors} = liveness_lookup(L, Liveness), Successors. --spec livein(gb_trees:tree(), _) -> [_]. +-spec livein(liveness(), _) -> [_]. livein(Liveness, L) -> {_GK, LiveIn, _Successors} = liveness_lookup(L, Liveness), @@ -292,18 +291,15 @@ strip([{_,Y}|Xs]) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). + liveness_init(List) -> - liveness_init(List, gb_trees:empty()). + maps:from_list(List). -liveness_init([{Lbl, Data}|Left], Acc) -> - liveness_init(Left, gb_trees:insert(Lbl, Data, Acc)); -liveness_init([], Acc) -> - Acc. - liveness_lookup(Label, Liveness) -> - gb_trees:get(Label, Liveness). + maps:get(Label, Liveness). liveness_update(Label, Val, Liveness) -> - gb_trees:update(Label, Val, Liveness). + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl index 3386523206..2abecf7f18 100644 --- a/lib/hipe/icode/hipe_beam_to_icode.erl +++ b/lib/hipe/icode/hipe_beam_to_icode.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%======================================================================= %% File : hipe_beam_to_icode.erl %% Author : Kostis Sagonas @@ -154,7 +148,8 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> {Code3,_Env3} = mk_debug_calltrace(MFA, Env1, Code2), {Code3,_Env3} = {Code2,Env1}), %% For stack optimization - Leafness = leafness(Code3), + IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure, + Leafness = leafness(Code3, IsClosure), IsLeaf = is_leaf_code(Leafness), Code4 = [FunLbl | @@ -162,7 +157,6 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> false -> Code3; true -> [mk_redtest()|Code3] end], - IsClosure = get_closure_info(MFA, ClosureInfo) =/= not_a_closure, Code5 = hipe_icode:mk_icode(MFA, FunArgs, IsClosure, IsLeaf, remove_dead_code(Code4), hipe_gensym:var_range(icode), @@ -179,12 +173,12 @@ trans_mfa_code(M,F,A, FunBeamCode, ClosureInfo) -> mk_redtest() -> hipe_icode:mk_primop([], redtest, []). -leafness(Is) -> % -> true, selfrec, or false - leafness(Is, true). +leafness(Is, IsClosure) -> % -> true, selfrec, closure, or false + leafness(Is, IsClosure, true). -leafness([], Leafness) -> +leafness([], _IsClosure, Leafness) -> Leafness; -leafness([I|Is], Leafness) -> +leafness([I|Is], IsClosure, Leafness) -> case I of #icode_comment{} -> %% BEAM self-tailcalls become gotos, but they leave @@ -197,7 +191,7 @@ leafness([I|Is], Leafness) -> 'self_tail_recursive' -> selfrec; % call_only to selfrec _ -> Leafness end, - leafness(Is, NewLeafness); + leafness(Is, IsClosure, NewLeafness); #icode_call{} -> case hipe_icode:call_type(I) of 'primop' -> @@ -205,12 +199,12 @@ leafness([I|Is], Leafness) -> call_fun -> false; % Calls closure enter_fun -> false; % Calls closure #apply_N{} -> false; - _ -> leafness(Is, Leafness) % Other primop calls are ok + _ -> leafness(Is, IsClosure, Leafness) % Other primop calls are ok end; T when T =:= 'local' orelse T =:= 'remote' -> {M,F,A} = hipe_icode:call_fun(I), case erlang:is_builtin(M, F, A) of - true -> leafness(Is, Leafness); + true -> leafness(Is, IsClosure, Leafness); false -> false end end; @@ -229,11 +223,12 @@ leafness([I|Is], Leafness) -> T when T =:= 'local' orelse T =:= 'remote' -> {M,F,A} = hipe_icode:enter_fun(I), case erlang:is_builtin(M, F, A) of - true -> leafness(Is, Leafness); + true -> leafness(Is, IsClosure, Leafness); + _ when IsClosure -> leafness(Is, IsClosure, closure); _ -> false end end; - _ -> leafness(Is, Leafness) + _ -> leafness(Is, IsClosure, Leafness) end. %% XXX: this old stuff is passed around but essentially unused @@ -241,12 +236,20 @@ is_leaf_code(Leafness) -> case Leafness of true -> true; selfrec -> true; + closure -> false; false -> false end. needs_redtest(Leafness) -> case Leafness of true -> false; + %% A "leaf" closure may contain tailcalls to non-closures in addition to + %% what other leaves may contain. Omitting the redtest is useful to generate + %% shorter code for closures generated by (fun F/A), and is safe since + %% control flow cannot return to a "leaf" closure again without a reduction + %% being consumed. This is true since no function that can call a closure + %% will ever have its redtest omitted. + closure -> false; selfrec -> true; false -> true end. @@ -510,6 +513,19 @@ trans_fun([{test,test_arity,{f,Lbl},[Reg,N]}|Instructions], Env) -> I = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, hipe_icode:label_name(True),map_label(Lbl)), [I,True | trans_fun(Instructions,Env)]; +%%--- test_is_tagged_tuple --- +trans_fun([{test,is_tagged_tuple,{f,Lbl},[Reg,N,Atom]}|Instructions], Env) -> + TrueArity = mk_label(new), + IArity = hipe_icode:mk_type([trans_arg(Reg)],{tuple,N}, + hipe_icode:label_name(TrueArity),map_label(Lbl)), + Var = hipe_icode:mk_new_var(), + IGet = hipe_icode:mk_primop([Var], + #unsafe_element{index=1}, + [trans_arg(Reg)]), + TrueAtom = mk_label(new), + IEQ = hipe_icode:mk_type([Var], Atom, hipe_icode:label_name(TrueAtom), + map_label(Lbl)), + [IArity,TrueArity,IGet,IEQ,TrueAtom | trans_fun(Instructions,Env)]; %%--- is_map --- trans_fun([{test,is_map,{f,Lbl},[Arg]}|Instructions], Env) -> {Code,Env1} = trans_type_test(map,Lbl,Arg,Env), diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl index 13a0bf6141..24b7ac4783 100644 --- a/lib/hipe/icode/hipe_icode.erl +++ b/lib/hipe/icode/hipe_icode.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% HiPE Intermediate Code %% ==================================================================== @@ -30,9 +24,6 @@ %% 2003-03-15 ES ([email protected]): %% Started commenting in Edoc. %% Moved pretty printer to separate file. -%% -%% $Id$ -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%@doc @@ -438,6 +429,7 @@ if_true_label/1, if_false_label/1, if_args/1, + if_args_update/2, if_pred/1, %% is_if/1, @@ -594,6 +586,7 @@ uses/1, defines/1, is_safe/1, + reduce_unused/1, strip_comments/1, subst/2, subst_uses/2, @@ -713,6 +706,9 @@ if_op_update(IF, NewOp) -> IF#icode_if{op=NewOp}. -spec if_args(#icode_if{}) -> [icode_term_arg()]. if_args(#icode_if{args=Args}) -> Args. +-spec if_args_update(#icode_if{}, [icode_term_arg()]) -> #icode_if{}. +if_args_update(IF, Args) -> IF#icode_if{args=Args}. + -spec if_true_label(#icode_if{}) -> icode_lbl(). if_true_label(#icode_if{true_label=TrueLbl}) -> TrueLbl. @@ -1765,6 +1761,18 @@ is_safe(Instr) -> #icode_end_try{} -> false end. +%% @doc Produces a simplified instruction sequence that is equivalent to [Instr] +%% under the assumption that all results of Instr are unused, or 'false' if +%% there is no such sequence (other than [Instr] itself). + +-spec reduce_unused(icode_instr()) -> false | [icode_instr()]. + +reduce_unused(Instr) -> + case is_safe(Instr) of + true -> []; + false -> false + end. + %%----------------------------------------------------------------------- -spec highest_var(icode_instrs()) -> non_neg_integer(). diff --git a/lib/hipe/icode/hipe_icode.hrl b/lib/hipe/icode/hipe_icode.hrl index b2e0d86b28..380ddd8371 100644 --- a/lib/hipe/icode/hipe_icode.hrl +++ b/lib/hipe/icode/hipe_icode.hrl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%===================================================================== %% diff --git a/lib/hipe/icode/hipe_icode_bincomp.erl b/lib/hipe/icode/hipe_icode_bincomp.erl index 5a27519141..f88637e526 100644 --- a/lib/hipe/icode/hipe_icode_bincomp.erl +++ b/lib/hipe/icode/hipe_icode_bincomp.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_icode_bincomp.erl @@ -40,8 +34,8 @@ -spec cfg(cfg()) -> cfg(). cfg(Cfg1) -> - StartLbls = ordsets:from_list([hipe_icode_cfg:start_label(Cfg1)]), - find_bs_get_integer(StartLbls, Cfg1, StartLbls). + StartLbl = hipe_icode_cfg:start_label(Cfg1), + find_bs_get_integer([StartLbl], Cfg1, set_from_list([StartLbl])). find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> BB = hipe_icode_cfg:bb(Cfg, Lbl), @@ -55,10 +49,10 @@ find_bs_get_integer([Lbl|Rest], Cfg, Visited) -> not_ok -> Cfg end, - Succs = ordsets:from_list(hipe_icode_cfg:succ(NewCfg, Lbl)), - NewSuccs = ordsets:subtract(Succs, Visited), - NewLbls = ordsets:union(NewSuccs, Rest), - NewVisited = ordsets:union(NewSuccs, Visited), + Succs = hipe_icode_cfg:succ(NewCfg, Lbl), + NewSuccs = not_visited(Succs, Visited), + NewLbls = NewSuccs ++ Rest, + NewVisited = set_union(set_from_list(NewSuccs), Visited), find_bs_get_integer(NewLbls, NewCfg, NewVisited); find_bs_get_integer([], Cfg, _) -> Cfg. @@ -177,3 +171,19 @@ make_butlast([{Res, Size}|Rest], Var) -> [Var, hipe_icode:mk_const((1 bsl Size)-1)]), hipe_icode:mk_primop([NewVar], 'bsr', [Var, hipe_icode:mk_const(Size)]) |make_butlast(Rest, NewVar)]. + +%%-------------------------------------------------------------------- +%% Sets + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := _} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_call_elim.erl b/lib/hipe/icode/hipe_icode_call_elim.erl index 71c709a7d1..367ce7cfe5 100644 --- a/lib/hipe/icode/hipe_icode_call_elim.erl +++ b/lib/hipe/icode/hipe_icode_call_elim.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% %%---------------------------------------------------------------------- %% File : hipe_icode_call_elim.erl %% Authors : Daniel S. McCain <[email protected]>, diff --git a/lib/hipe/icode/hipe_icode_callgraph.erl b/lib/hipe/icode/hipe_icode_callgraph.erl index 12c2cd2b44..365c65315e 100644 --- a/lib/hipe/icode/hipe_icode_callgraph.erl +++ b/lib/hipe/icode/hipe_icode_callgraph.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%----------------------------------------------------------------------- %% File : hipe_icode_callgraph.erl diff --git a/lib/hipe/icode/hipe_icode_cfg.erl b/lib/hipe/icode/hipe_icode_cfg.erl index 9a602c0283..c5f5592cc9 100644 --- a/lib/hipe/icode/hipe_icode_cfg.erl +++ b/lib/hipe/icode/hipe_icode_cfg.erl @@ -1,10 +1,5 @@ %% -*- erlang-indent-level: 2 -*- -%%====================================================================== %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_icode_cfg). diff --git a/lib/hipe/icode/hipe_icode_coordinator.erl b/lib/hipe/icode/hipe_icode_coordinator.erl index d2f8748535..4ef210eca4 100644 --- a/lib/hipe/icode/hipe_icode_coordinator.erl +++ b/lib/hipe/icode/hipe_icode_coordinator.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%-------------------------------------------------------------------- %% File : hipe_icode_coordinator.erl @@ -106,12 +100,29 @@ handle_no_change_done(MFA, {Queue, Busy}) -> {Queue, Busy -- [MFA]}. last_action(PM, ServerPid, Mod, All) -> - lists:foreach(fun (MFA) -> - gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, - receive - {done_rewrite, MFA} -> ok - end - end, All). + last_action(PM, ServerPid, Mod, All, []). + +last_action(_, _, _, [], []) -> ok; +last_action(PM, ServerPid, Mod, [], [MFA|Busy]) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, [], Busy) + end; +last_action(PM, ServerPid, Mod, All0, Busy) -> + receive + {done_rewrite, MFA} -> + last_action(PM, ServerPid, Mod, All0, Busy -- [MFA]) + after 0 -> + case ?MAX_CONCURRENT - length(Busy) of + X when is_integer(X), X > 0 -> + [MFA|All1] = All0, + gb_trees:get(MFA, PM) ! {done, final_funs(ServerPid, Mod)}, + last_action(PM, ServerPid, Mod, All1, [MFA|Busy]); + X when is_integer(X) -> + Busy1 = receive {done_rewrite, MFA} -> Busy -- [MFA] end, + last_action(PM, ServerPid, Mod, All0, Busy1) + end + end. restart_funs({Queue, Busy} = QB, PM, All, ServerPid) -> case ?MAX_CONCURRENT - length(Busy) of diff --git a/lib/hipe/icode/hipe_icode_ebb.erl b/lib/hipe/icode/hipe_icode_ebb.erl index 2aac9d2f42..2cc4321fb8 100644 --- a/lib/hipe/icode/hipe_icode_ebb.erl +++ b/lib/hipe/icode/hipe_icode_ebb.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,14 +9,12 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Icode version of extended basic blocks. %% - + -module(hipe_icode_ebb). -define(CFG, hipe_icode_cfg). diff --git a/lib/hipe/icode/hipe_icode_exceptions.erl b/lib/hipe/icode/hipe_icode_exceptions.erl index f03ce2faaa..0039eb5091 100644 --- a/lib/hipe/icode/hipe_icode_exceptions.erl +++ b/lib/hipe/icode/hipe_icode_exceptions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% ==================================================================== %% Filename : hipe_icode_exceptions.erl @@ -71,9 +65,6 @@ %% exit value and jump directly to the catch handler. An %% alternative solution would be to have a new type of %% fail instruction that takes a fail-to label... -%% -%% CVS: -%% $Id$ %% ==================================================================== -module(hipe_icode_exceptions). diff --git a/lib/hipe/icode/hipe_icode_fp.erl b/lib/hipe/icode/hipe_icode_fp.erl index 4a5877074c..4933ee96b4 100644 --- a/lib/hipe/icode/hipe_icode_fp.erl +++ b/lib/hipe/icode/hipe_icode_fp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%-------------------------------------------------------------------- %% File : hipe_icode_fp.erl diff --git a/lib/hipe/icode/hipe_icode_heap_test.erl b/lib/hipe/icode/hipe_icode_heap_test.erl index ec754d5ee9..1a4f28e1af 100644 --- a/lib/hipe/icode/hipe_icode_heap_test.erl +++ b/lib/hipe/icode/hipe_icode_heap_test.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2000 by Erik Johansson. All Rights Reserved @@ -27,9 +21,6 @@ %% Notes : %% History : * 2000-11-07 Erik Johansson ([email protected]): %% Created. -%% -%% $Id$ -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_icode_heap_test). diff --git a/lib/hipe/icode/hipe_icode_inline_bifs.erl b/lib/hipe/icode/hipe_icode_inline_bifs.erl index 79f67c2db6..7a6947f190 100644 --- a/lib/hipe/icode/hipe_icode_inline_bifs.erl +++ b/lib/hipe/icode/hipe_icode_inline_bifs.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%-------------------------------------------------------------------- %% File : hipe_icode_inline_bifs.erl @@ -24,7 +18,7 @@ %% Purpose : Inlines BIFs which can be expressed easily in ICode. %% This allows for optimizations in later ICode passes %% and makes the code faster. -%% +%% %% Created : 14 May 2007 by Per Gustafsson <[email protected]> %%-------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode_instruction_counter.erl b/lib/hipe/icode/hipe_icode_instruction_counter.erl index afa70e495b..97a19753a1 100644 --- a/lib/hipe/icode/hipe_icode_instruction_counter.erl +++ b/lib/hipe/icode/hipe_icode_instruction_counter.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%------------------------------------------------------------------- %% File : icode_instruction_counter.erl diff --git a/lib/hipe/icode/hipe_icode_liveness.erl b/lib/hipe/icode/hipe_icode_liveness.erl index 317d2e54c2..51e2855108 100644 --- a/lib/hipe/icode/hipe_icode_liveness.erl +++ b/lib/hipe/icode/hipe_icode_liveness.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/icode/hipe_icode_mulret.erl b/lib/hipe/icode/hipe_icode_mulret.erl index d927a46222..227cfadfda 100644 --- a/lib/hipe/icode/hipe_icode_mulret.erl +++ b/lib/hipe/icode/hipe_icode_mulret.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_icode_mulret.erl diff --git a/lib/hipe/icode/hipe_icode_pp.erl b/lib/hipe/icode/hipe_icode_pp.erl index a736b54c38..5b017dca32 100644 --- a/lib/hipe/icode/hipe_icode_pp.erl +++ b/lib/hipe/icode/hipe_icode_pp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2003 by Erik Stenman. @@ -26,10 +20,6 @@ %% Purpose : Pretty-printer for Icode. %% Notes : %% History : * 2003-04-16 ([email protected]): Created. -%% CVS : -%% $Author$ -%% $Date$ -%% $Revision$ %% ==================================================================== %% %% @doc diff --git a/lib/hipe/icode/hipe_icode_primops.erl b/lib/hipe/icode/hipe_icode_primops.erl index 2a141c514e..50ece05259 100644 --- a/lib/hipe/icode/hipe_icode_primops.erl +++ b/lib/hipe/icode/hipe_icode_primops.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved %% ==================================================================== diff --git a/lib/hipe/icode/hipe_icode_primops.hrl b/lib/hipe/icode/hipe_icode_primops.hrl index a0aee165ba..6c6fbd3dad 100644 --- a/lib/hipe/icode/hipe_icode_primops.hrl +++ b/lib/hipe/icode/hipe_icode_primops.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,16 +11,12 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%======================================================================= %% File : hipe_icode_primops.hrl %% Author : Kostis Sagonas %% Description : Contains definitions for HiPE's primitive operations. %%======================================================================= -%% $Id$ -%%======================================================================= -record(apply_N, {arity :: arity()}). diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl index 12ed796690..287b1c80fe 100644 --- a/lib/hipe/icode/hipe_icode_range.erl +++ b/lib/hipe/icode/hipe_icode_range.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%%------------------------------------------------------------------- %%% File : hipe_icode_range.erl %%% Author : Per Gustafsson <[email protected]> @@ -73,8 +67,8 @@ -type final_fun() :: fun((mfa(), [range()]) -> 'ok'). -type data() :: {mfa(), args_fun(), call_fun(), final_fun()}. -type label() :: non_neg_integer(). --type info() :: gb_trees:tree(). --type work_list() :: {[label()], [label()], sets:set()}. +-type info() :: map(). +-type work_list() :: {[label()], [label()], set(label())}. -type variable() :: #icode_variable{}. -type annotated_variable() :: #icode_variable{}. -type argument() :: #icode_const{} | variable(). @@ -82,10 +76,9 @@ -type instr_split_info() :: {icode_instr(), [{label(),info()}]}. -type last_instr_return() :: {instr_split_info(), range()}. --record(state, {info_map = gb_trees:empty() :: info(), - counter = dict:new() :: dict:dict(), - cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), +-record(state, {info_map = #{} :: info(), + cfg :: cfg(), + liveness :: hipe_icode_ssa:liveness(), ret_type :: range(), lookup_fun :: call_fun(), result_action :: final_fun()}). @@ -187,17 +180,16 @@ safe_analyse(CFG, Data={MFA,_,_,_}) -> rewrite_blocks(State) -> CFG = state__cfg(State), Start = hipe_icode_cfg:start_label(CFG), - rewrite_blocks([Start], State, [Start]). + rewrite_blocks([Start], State, set_from_list([Start])). --spec rewrite_blocks([label()], state(), [label()]) -> state(). +-spec rewrite_blocks([label()], state(), set(label())) -> state(). rewrite_blocks([Next|Rest], State, Visited) -> Info = state__info_in(State, Next), {NewState, NewLabels} = analyse_block(Next, Info, State, true), - NewLabelsSet = ordsets:from_list(NewLabels), - RealNew = ordsets:subtract(NewLabelsSet, Visited), - NewVisited = ordsets:union([RealNew, Visited, [Next]]), - NewWork = ordsets:union([RealNew, Rest]), + RealNew = not_visited(NewLabels, Visited), + NewVisited = set_union(set_from_list(RealNew), Visited), + NewWork = RealNew ++ Rest, rewrite_blocks(NewWork, NewState, NewVisited); rewrite_blocks([], State, _) -> State. @@ -400,14 +392,17 @@ widen(#range{range=Old}, #range{range=New}, T = #range{range=Wide}) -> -spec analyse_call(#icode_call{}, call_fun()) -> #icode_call{}. analyse_call(Call, LookupFun) -> + Args = hipe_icode:args(Call), + Fun = hipe_icode:call_fun(Call), + Type = hipe_icode:call_type(Call), + %% This call has side-effects (it might call LookupFun which sends messages to + %% hipe_icode_coordinator to update the argument ranges of Fun), and must thus + %% not be moved into the case statement. + DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun), case hipe_icode:call_dstlist(Call) of [] -> Call; Dsts -> - Args = hipe_icode:args(Call), - Fun = hipe_icode:call_fun(Call), - Type = hipe_icode:call_type(Call), - DstRanges = analyse_call_or_enter_fun(Fun, Args, Type, LookupFun), NewDefs = [update_info(Var, R) || {Var,R} <- lists:zip(Dsts, DstRanges)], hipe_icode:subst_defines(lists:zip(Dsts, NewDefs), Call) end. @@ -1314,16 +1309,15 @@ range_rem(Range1, Range2) -> Min1_geq_zero = inf_geq(Min1, 0), Max1_leq_zero = inf_geq(0, Max1), Max_range2 = inf_max([inf_abs(Min2), inf_abs(Max2)]), - Max_range2_leq_zero = inf_geq(0, Max_range2), New_min = if Min1_geq_zero -> 0; - Max_range2_leq_zero -> Max_range2; - true -> inf_inv(Max_range2) + Max_range2 =:= 0 -> 0; + true -> inf_add(inf_inv(Max_range2), 1) end, New_max = if Max1_leq_zero -> 0; - Max_range2_leq_zero -> inf_inv(Max_range2); - true -> Max_range2 + Max_range2 =:= 0 -> 0; + true -> inf_add(Max_range2, -1) end, range_init({New_min, New_max}, false). @@ -1661,8 +1655,8 @@ state__init(Cfg, {MFA, ArgsFun, CallFun, FinalFun}) -> false -> NewParams = lists:zipwith(fun update_info/2, Params, Ranges), NewCfg = hipe_icode_cfg:params_update(Cfg, NewParams), - Info = enter_defines(NewParams, gb_trees:empty()), - InfoMap = gb_trees:insert({Start, in}, Info, gb_trees:empty()), + Info = enter_defines(NewParams, #{}), + InfoMap = #{{Start, in} => Info}, #state{info_map=InfoMap, cfg=NewCfg, liveness=Liveness, ret_type=none_type(), lookup_fun=CallFun, result_action=FinalFun} @@ -1700,7 +1694,7 @@ state__info_in(S, Label) -> state__info(S, {Label, in}). state__info(#state{info_map=IM}, Key) -> - gb_trees:get(Key, IM). + maps:get(Key, IM). state__update_info(State, LabelInfo, Rewrite) -> update_info(LabelInfo, State, [], Rewrite). @@ -1721,60 +1715,58 @@ update_info([], State, LabelAcc, _Rewrite) -> state__info_in_update(S=#state{info_map=IM,liveness=Liveness}, Label, Info) -> LabelIn = {Label, in}, - case gb_trees:lookup(LabelIn, IM) of - none -> + case IM of + #{LabelIn := OldInfo} -> + OldVars = maps:keys(OldInfo), + case join_info_in(OldVars, OldInfo, Info) of + fixpoint -> + fixpoint; + NewInfo -> + S#state{info_map=IM#{LabelIn := NewInfo}} + end; + _ -> LiveIn = hipe_icode_ssa:ssa_liveness__livein(Liveness, Label), NamesLiveIn = [hipe_icode:var_name(Var) || Var <- LiveIn, hipe_icode:is_var(Var)], - OldInfo = gb_trees:empty(), + OldInfo = #{}, case join_info_in(NamesLiveIn, OldInfo, Info) of fixpoint -> - S#state{info_map=gb_trees:insert(LabelIn, OldInfo, IM)}; + S#state{info_map=IM#{LabelIn => OldInfo}}; NewInfo -> - S#state{info_map=gb_trees:enter(LabelIn, NewInfo, IM)} - end; - {value, OldInfo} -> - OldVars = gb_trees:keys(OldInfo), - case join_info_in(OldVars, OldInfo, Info) of - fixpoint -> - fixpoint; - NewInfo -> - S#state{info_map=gb_trees:update(LabelIn, NewInfo, IM)} + S#state{info_map=IM#{LabelIn => NewInfo}} end end. join_info_in(Vars, OldInfo, NewInfo) -> - case join_info_in(Vars, OldInfo, NewInfo, gb_trees:empty(), false) of + case join_info_in(Vars, OldInfo, NewInfo, #{}, false) of {Res, true} -> Res; {_, false} -> fixpoint end. join_info_in([Var|Left], Info1, Info2, Acc, Changed) -> - Type1 = gb_trees:lookup(Var, Info1), - Type2 = gb_trees:lookup(Var, Info2), - case {Type1, Type2} of - {none, none} -> - NewTree = gb_trees:insert(Var, none_type(), Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {none, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, true); - {{value, Val}, none} -> - NewTree = gb_trees:insert(Var, Val, Acc), - join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val}, {value, Val}} -> - NewTree = gb_trees:insert(Var, Val, Acc), + case {Info1, Info2} of + {#{Var := Val}, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, join_info_in(Left, Info1, Info2, NewTree, Changed); - {{value, Val1}, {value, Val2}} -> - {NewChanged, NewVal} = + {#{Var := Val1}, #{Var := Val2}} -> + {NewChanged, NewVal} = case sup(Val1, Val2) of Val1 -> {Changed, Val1}; Val -> {true, Val} end, - NewTree = gb_trees:insert(Var, NewVal, Acc), - join_info_in(Left, Info1, Info2, NewTree, NewChanged) + NewTree = Acc#{Var => NewVal}, + join_info_in(Left, Info1, Info2, NewTree, NewChanged); + {_, #{Var := Val}} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, true); + {#{Var := Val}, _} -> + NewTree = Acc#{Var => Val}, + join_info_in(Left, Info1, Info2, NewTree, Changed); + {_, _} -> + NewTree = Acc#{Var => none_type()}, + join_info_in(Left, Info1, Info2, NewTree, true) end; join_info_in([], _Info1, _Info2, Acc, NewChanged) -> {Acc, NewChanged}. @@ -1786,7 +1778,7 @@ enter_defines([], Info) -> Info. enter_define({PossibleVar, Range = #range{}}, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; false -> Info end; @@ -1795,7 +1787,7 @@ enter_define(PossibleVar, Info) -> true -> case hipe_icode:variable_annotation(PossibleVar) of {range_anno, #ann{range=Range}, _} -> - gb_trees:enter(hipe_icode:var_name(PossibleVar), Range, Info); + Info#{hipe_icode:var_name(PossibleVar) => Range}; _ -> Info end; @@ -1810,11 +1802,10 @@ enter_vals(Ins, Info) -> lookup(PossibleVar, Info) -> case hipe_icode:is_var(PossibleVar) of true -> - case gb_trees:lookup(hipe_icode:var_name(PossibleVar), Info) of - none -> - none_type(); - {value, Val} -> - Val + PossibleVarName = hipe_icode:var_name(PossibleVar), + case Info of + #{PossibleVarName := Val} -> Val; + _ -> none_type() end; false -> none_type() @@ -1828,10 +1819,10 @@ lookup(PossibleVar, Info) -> init_work(State) -> %% Labels = hipe_icode_cfg:reverse_postorder(state__cfg(State)), Labels = [hipe_icode_cfg:start_label(state__cfg(State))], - {Labels, [], sets:from_list(Labels)}. + {Labels, [], set_from_list(Labels)}. get_work({[Label|Left], List, Set}) -> - NewWork = {Left, List, sets:del_element(Label, Set)}, + NewWork = {Left, List, maps:remove(Label, Set)}, {Label, NewWork}; get_work({[], [], _Set}) -> fixpoint; @@ -1839,12 +1830,12 @@ get_work({[], List, Set}) -> get_work({lists:reverse(List), [], Set}). add_work(Work = {List1, List2, Set}, [Label|Left]) -> - case sets:is_element(Label, Set) of - true -> + case Set of + #{Label := _} -> add_work(Work, Left); - false -> + _ -> %% io:format("Adding work: ~w\n", [Label]), - add_work({List1, [Label|List2], sets:add_element(Label, Set)}, Left) + add_work({List1, [Label|List2], Set#{Label => []}}, Left) end; add_work(Work, []) -> Work. @@ -1959,3 +1950,21 @@ next_down_limit(X) when is_integer(X), X > -16#8000000 -> -16#8000000; next_down_limit(X) when is_integer(X), X > -16#80000000 -> -16#80000000; next_down_limit(X) when is_integer(X), X > -16#800000000000000 -> -16#800000000000000; next_down_limit(_X) -> neg_inf. + +%%-------------------------------------------------------------------- +%% Sets + +-type set(E) :: #{E => []}. + +set_from_list([]) -> #{}; +set_from_list(L) -> + maps:from_list([{E, []} || E <- L]). + +not_visited([], _) -> []; +not_visited([E|T], M) -> + case M of + #{E := []} -> not_visited(T, M); + _ -> [E|not_visited(T, M)] + end. + +set_union(A, B) -> maps:merge(A, B). diff --git a/lib/hipe/icode/hipe_icode_split_arith.erl b/lib/hipe/icode/hipe_icode_split_arith.erl index e00a13f82e..44c1a9578d 100644 --- a/lib/hipe/icode/hipe_icode_split_arith.erl +++ b/lib/hipe/icode/hipe_icode_split_arith.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%------------------------------------------------------------------- %% File : hipe_icode_split_arith.erl diff --git a/lib/hipe/icode/hipe_icode_ssa.erl b/lib/hipe/icode/hipe_icode_ssa.erl index b222fbc7d2..88317e9629 100644 --- a/lib/hipe/icode/hipe_icode_ssa.erl +++ b/lib/hipe/icode/hipe_icode_ssa.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_icode_ssa.erl @@ -34,13 +28,16 @@ -define(LIVENESS, hipe_icode_liveness). -define(LIVENESS_NEEDED, true). +-export_type([liveness/0]). + -include("hipe_icode.hrl"). -include("../ssa/hipe_ssa.inc"). %% Declarations for exported functions which are Icode-specific. --spec ssa_liveness__analyze(#cfg{}) -> gb_trees:tree(). --spec ssa_liveness__livein(_, icode_lbl()) -> [#icode_variable{}]. -%% -spec ssa_liveness__livein(_, icode_lbl(), _) -> [#icode_var{}]. +-opaque liveness() :: liveness(icode_lbl(), #icode_variable{}). +-spec ssa_liveness__analyze(#cfg{}) -> liveness(). +-spec ssa_liveness__livein(liveness(), icode_lbl()) -> [#icode_variable{}]. +%% -spec ssa_liveness__livein(liveness(), icode_lbl(), _) -> [#icode_var{}]. %%---------------------------------------------------------------------- %% Auxiliary operations which seriously differ between Icode and RTL. diff --git a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl index 4ab4d7e95d..e2cd013b4c 100644 --- a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl +++ b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -97,11 +91,13 @@ visit_expression(Instruction, Environment) -> visit_begin_handler (Instruction, EvaluatedArguments, Environment); #icode_begin_try{} -> visit_begin_try (Instruction, EvaluatedArguments, Environment); - #icode_fail{} -> + #icode_fail{} -> visit_fail (Instruction, EvaluatedArguments, Environment); - _ -> - %% label, end_try, comment, return, - {[], [], Environment} + #icode_comment{} -> {[], [], Environment}; + #icode_end_try{} -> {[], [], Environment}; + #icode_enter{} -> {[], [], Environment}; + #icode_label{} -> {[], [], Environment}; + #icode_return{} -> {[], [], Environment} end. %%----------------------------------------------------------------------------- @@ -463,11 +459,15 @@ update_instruction(Instruction, Environment) -> update_type(Instruction, Environment); #icode_switch_tuple_arity{} -> update_switch_tuple_arity(Instruction, Environment); - _ -> - %% goto, comment, label, return, begin_handler, end_try, - %% begin_try, fail - %% We could but don't handle: catch?, fail? - [Instruction] + %% We could but don't handle: catch?, fail? + #icode_begin_handler{} -> [Instruction]; + #icode_begin_try{} -> [Instruction]; + #icode_comment{} -> [Instruction]; + #icode_end_try{} -> [Instruction]; + #icode_fail{} -> [Instruction]; + #icode_goto{} -> [Instruction]; + #icode_label{} -> [Instruction]; + #icode_return{} -> [Instruction] end. %%----------------------------------------------------------------------------- @@ -502,14 +502,12 @@ update_call(Instruction, Environment) -> [Instruction, NewInstructions]), NewInstructions end; -%% %% [] -> %% No destination; we don't touch this -%% [] -> -%% NewArguments = update_arguments(hipe_icode:call_args(Instruction), -%% Environment), -%% [hipe_icode:call_args_update(Instruction, NewArguments)]; + %% [] -> %% No destination; we don't touch this %% List-> %% Means register allocation; not implemented at this point _ -> - [Instruction] + NewArguments = update_arguments(hipe_icode:call_args(Instruction), + Environment), + [hipe_icode:call_args_update(Instruction, NewArguments)] end. %%----------------------------------------------------------------------------- @@ -574,7 +572,9 @@ update_if(Instruction, Environment) -> %% Convert the if-test to a type test if possible. Op = hipe_icode:if_op(Instruction), case Op =:= '=:=' orelse Op =:= '=/=' of - false -> [Instruction]; + false -> + [hipe_icode:if_args_update( + Instruction, update_arguments(Args, Environment))]; true -> [Arg1, Arg2] = Args, case EvaluatedArguments of @@ -604,8 +604,9 @@ conv_if_to_type(I, Const, Arg) when is_atom(Const); NewI = hipe_icode:mk_type([Arg], Test, T, F), ?CONST_PROP_MSG("if: ~w ---> type ~w\n", [I, NewI]), [NewI]; -conv_if_to_type(I, _, _) -> - [I]. +conv_if_to_type(I, Const, Arg) -> + %% Note: we are potentially commuting the (equality) comparison here + [hipe_icode:if_args_update(I, [Arg, hipe_icode:mk_const(Const)])]. %%----------------------------------------------------------------------------- diff --git a/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl b/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl index 5e5bd2a178..b92b7cfa7a 100644 --- a/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl +++ b/lib/hipe/icode/hipe_icode_ssa_copy_prop.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%------------------------------------------------------------------- %% File : hipe_icode_ssa_copy_prop.erl diff --git a/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl b/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl index 7613024787..ec4840980d 100644 --- a/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl +++ b/lib/hipe/icode/hipe_icode_ssa_struct_reuse.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%======================================================================= %% File : hipe_icode_ssa_struct_reuse.erl diff --git a/lib/hipe/icode/hipe_icode_type.erl b/lib/hipe/icode/hipe_icode_type.erl index 794c27ebcc..aafaeb5a0a 100644 --- a/lib/hipe/icode/hipe_icode_type.erl +++ b/lib/hipe/icode/hipe_icode_type.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%%-------------------------------------------------------------------- %%% File : hipe_icode_type.erl %%% Author : Tobias Lindahl <[email protected]> @@ -100,7 +94,7 @@ -record(state, {info_map = gb_trees:empty() :: gb_trees:tree(), cfg :: cfg(), - liveness = gb_trees:empty() :: gb_trees:tree(), + liveness :: hipe_icode_ssa:liveness(), arg_types :: [erl_types:erl_type()], ret_type = [t_none()] :: [erl_types:erl_type()], lookupfun :: call_fun(), @@ -1416,9 +1410,10 @@ transform_element2(I) -> NewIndex = case test_type(integer, IndexType) of true -> - case t_number_vals(IndexType) of - unknown -> unknown; - [_|_] = Vals -> {number, Vals} + case {number_min(IndexType), number_max(IndexType)} of + {Lb0, Ub0} when is_integer(Lb0), is_integer(Ub0) -> + {number, Lb0, Ub0}; + {_, _} -> unknown end; _ -> unknown end, @@ -1433,19 +1428,19 @@ transform_element2(I) -> _ -> unknown end, case {NewIndex, MinSize} of - {{number, [_|_] = Ns}, {tuple, A}} when is_integer(A) -> - case lists:all(fun(X) -> 0 < X andalso X =< A end, Ns) of + {{number, Lb, Ub}, {tuple, A}} when is_integer(A) -> + case 0 < Lb andalso Ub =< A of true -> - case Ns of - [Idx] -> + case {Lb, Ub} of + {Idx, Idx} -> [_, Tuple] = hipe_icode:args(I), update_call_or_enter(I, #unsafe_element{index = Idx}, [Tuple]); - [_|_] -> + {_, _} -> NewFun = {element, [MinSize, valid]}, update_call_or_enter(I, NewFun) end; false -> - case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, Ns) of + case lists:all(fun(X) -> hipe_tagscheme:is_fixnum(X) end, [Lb, Ub]) of true -> NewFun = {element, [MinSize, fixnums]}, update_call_or_enter(I, NewFun); @@ -1460,7 +1455,7 @@ transform_element2(I) -> NewFun = {element, [MinSize, fixnums]}, update_call_or_enter(I, NewFun); false -> - NewFun = {element, [MinSize, NewIndex]}, + NewFun = {element, [MinSize, NewIndex]}, update_call_or_enter(I, NewFun) end end. diff --git a/lib/hipe/icode/hipe_icode_type.hrl b/lib/hipe/icode/hipe_icode_type.hrl index 466e157646..b7c200eef1 100644 --- a/lib/hipe/icode/hipe_icode_type.hrl +++ b/lib/hipe/icode/hipe_icode_type.hrl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_icode_type.hrl diff --git a/lib/hipe/llvm/Makefile b/lib/hipe/llvm/Makefile index 88016a7d8b..817ff67dcd 100644 --- a/lib/hipe/llvm/Makefile +++ b/lib/hipe/llvm/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2001-2016. All Rights Reserved. +# Copyright Ericsson AB 2001-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -73,8 +73,7 @@ include ../native.mk ERL_COMPILE_FLAGS += -Werror +inline +warn_export_vars #+warn_missing_spec # if in 32 bit backend define BIT32 symbol -ARCH = $(shell echo $(TARGET) | sed 's/^\(x86_64\)-.*/64bit/') -ifneq ($(ARCH), 64bit) +ifneq ($(BITS64),yes) ERL_COMPILE_FLAGS += -DBIT32 endif diff --git a/lib/hipe/llvm/hipe_llvm.erl b/lib/hipe/llvm/hipe_llvm.erl index b22f8fb320..641d3fda0a 100644 --- a/lib/hipe/llvm/hipe_llvm.erl +++ b/lib/hipe/llvm/hipe_llvm.erl @@ -862,7 +862,7 @@ pp_ins(Dev, Ver, I) -> true -> write(Dev, "volatile "); false -> ok end, - pp_dereference_type(Dev, Ver, load_p_type(I)), + pp_dereference_type(Dev, load_p_type(I)), write(Dev, [" ", load_pointer(I), " "]), case load_alignment(I) of [] -> ok; @@ -898,7 +898,7 @@ pp_ins(Dev, Ver, I) -> true -> write(Dev, "inbounds "); false -> ok end, - pp_dereference_type(Dev, Ver, getelementptr_p_type(I)), + pp_dereference_type(Dev, getelementptr_p_type(I)), write(Dev, [" ", getelementptr_value(I)]), pp_typed_idxs(Dev, getelementptr_typed_idxs(I)), write(Dev, "\n"); @@ -959,10 +959,8 @@ pp_ins(Dev, Ver, I) -> pp_args(Dev, fun_def_arglist(I)), write(Dev, ") "), pp_options(Dev, fun_def_fn_attrs(I)), - case Ver >= {3,7} of false -> ok; true -> - write(Dev, "personality i32 (i32, i64, i8*,i8*)* " - "@__gcc_personality_v0 ") - end, + write(Dev, "personality i32 (i32, i64, i8*,i8*)* " + "@__gcc_personality_v0 "), case fun_def_align(I) of [] -> ok; N -> write(Dev, ["align ", N]) @@ -997,12 +995,7 @@ pp_ins(Dev, Ver, I) -> pp_type(Dev, const_decl_type(I)), write(Dev, [" ", const_decl_value(I), "\n"]); #llvm_landingpad{} -> - write(Dev, "landingpad { i8*, i32 } "), - case Ver < {3,7} of false -> ok; true -> - write(Dev, "personality i32 (i32, i64, i8*,i8*)* " - "@__gcc_personality_v0 ") - end, - write(Dev, "cleanup\n"); + write(Dev, "landingpad { i8*, i32 } cleanup\n"); #llvm_asm{} -> write(Dev, [asm_instruction(I), "\n"]); #llvm_adj_stack{} -> @@ -1011,15 +1004,7 @@ pp_ins(Dev, Ver, I) -> pp_type(Dev, adj_stack_type(I)), write(Dev, [" ", adj_stack_offset(I),")\n"]); #llvm_meta{} -> - write(Dev, ["!", meta_id(I), " = "]), - Named = case string:to_integer(meta_id(I)) of - {_, ""} -> false; - _ -> true - end, - case Ver < {3,6} andalso not Named of - true -> write(Dev, "metadata !{metadata "); - false -> write(Dev, "!{ ") - end, + write(Dev, ["!", meta_id(I), " = !{ "]), write(Dev, string:join([if is_list(Op) -> ["!\"", Op, "\""]; is_integer(Op) -> ["i32 ", integer_to_list(Op)]; is_record(Op, llvm_meta) -> @@ -1030,15 +1015,10 @@ pp_ins(Dev, Ver, I) -> exit({?MODULE, pp_ins, {"Unknown LLVM instruction", Other}}) end. -%% @doc Print the type of a dereference in an LLVM instruction using syntax -%% parsable by the specified LLVM version. -pp_dereference_type(Dev, Ver, Type) -> - case Ver >= {3,7} of - false -> ok; - true -> - pp_type(Dev, pointer_type(Type)), - write(Dev, ", ") - end, +%% @doc Print the type of a dereference in an LLVM instruction. +pp_dereference_type(Dev, Type) -> + pp_type(Dev, pointer_type(Type)), + write(Dev, ", "), pp_type(Dev, Type). %% @doc Pretty-print a list of types diff --git a/lib/hipe/llvm/hipe_llvm_main.erl b/lib/hipe/llvm/hipe_llvm_main.erl index 7f70826046..4eec0c752b 100644 --- a/lib/hipe/llvm/hipe_llvm_main.erl +++ b/lib/hipe/llvm/hipe_llvm_main.erl @@ -108,8 +108,10 @@ llvm_llc(Dir, Filename, Ver, Options) -> OptLevel = trans_optlev_flag(llc, Options), VerFlags = llc_ver_flags(Ver), Align = find_stack_alignment(), + Target = llc_target_opt(), LlcFlags = [OptLevel, "-code-model=medium", "-stack-alignment=" ++ Align , "-tailcallopt", "-filetype=asm" %FIXME + , Target | VerFlags], Command = "llc " ++ fix_opts(LlcFlags) ++ " " ++ Source, %% io:format("LLC: ~s~n", [Command]), @@ -123,7 +125,8 @@ llvm_llc(Dir, Filename, Ver, Options) -> compile(Dir, Fun_Name, Compiler) -> Source = Dir ++ Fun_Name ++ ".s", Dest = Dir ++ Fun_Name ++ ".o", - Command = Compiler ++ " -c " ++ Source ++ " -o " ++ Dest, + Target = compiler_target_opt(), + Command = Compiler ++ " " ++ Target ++ " -c " ++ Source ++ " -o " ++ Dest, %% io:format("~s: ~s~n", [Compiler, Command]), case os:cmd(Command) of "" -> ok; @@ -137,6 +140,18 @@ find_stack_alignment() -> _ -> exit({?MODULE, find_stack_alignment, "Unimplemented architecture"}) end. +llc_target_opt() -> + case get(hipe_target_arch) of + x86 -> "-march=x86"; + amd64 -> "-march=x86-64" + end. + +compiler_target_opt() -> + case get(hipe_target_arch) of + x86 -> "-m32"; + amd64 -> "-m64" + end. + %% @doc Join options. fix_opts(Opts) -> string:join(Opts, " "). @@ -265,15 +280,11 @@ fix_relocations(Relocs, RelocsDict, MFA) -> fix_reloc(#elf_rel{symbol=#elf_sym{name=Name, section=undefined, type=notype}, offset=Offset, type=?PCREL_T, addend=?PCREL_A}, - RelocsDict, {ModName,_,_}) when Name =/= "" -> + RelocsDict, {_,_,_}) when Name =/= "" -> case dict:fetch(Name, RelocsDict) of - {call, {bif, BifName, _}} -> {?CALL_LOCAL, Offset, BifName}; - %% MFA calls to functions in the same module are of type 3, while all - %% other MFA calls are of type 2. - %% XXX: Does this code break hot code loading (by transforming external - %% calls into local calls?) - {call, {ModName,_F,_A}=CallMFA} -> {?CALL_LOCAL, Offset, CallMFA}; - {call, CallMFA} -> {?CALL_REMOTE, Offset, CallMFA} + {call, _, {bif, BifName, _}} -> {?CALL_LOCAL, Offset, BifName}; + {call, not_remote, CallMFA} -> {?CALL_LOCAL, Offset, CallMFA}; + {call, remote, CallMFA} -> {?CALL_REMOTE, Offset, CallMFA} end; fix_reloc(#elf_rel{symbol=#elf_sym{name=Name, section=undefined, type=notype}, offset=Offset, type=?ABS_T, addend=?ABS_A}, @@ -288,7 +299,7 @@ fix_reloc(#elf_rel{symbol=#elf_sym{name=Name, section=#elf_shdr{name=?TEXT}, offset=Offset, type=?PCREL_T, addend=?PCREL_A}, RelocsDict, MFA) when Name =/= "" -> case dict:fetch(Name, RelocsDict) of - {call, MFA} -> {?CALL_LOCAL, Offset, MFA} + {call, not_remote, MFA} -> {?CALL_LOCAL, Offset, MFA} end; fix_reloc(#elf_rel{symbol=#elf_sym{name=Name, section=#elf_shdr{name=?RODATA}, type=object}, @@ -416,7 +427,7 @@ calls_with_stack_args(Dict) -> calls_with_stack_args(dict:to_list(Dict), []). calls_with_stack_args([], Calls) -> Calls; -calls_with_stack_args([ {_Name, {call, {M, F, A}}} | Rest], Calls) +calls_with_stack_args([ {_Name, {call, _, {M, F, A}}} | Rest], Calls) when A > ?NR_ARG_REGS -> Call = case M of diff --git a/lib/hipe/llvm/hipe_llvm_merge.erl b/lib/hipe/llvm/hipe_llvm_merge.erl index 6e891ac3b0..58d862fbb2 100644 --- a/lib/hipe/llvm/hipe_llvm_merge.erl +++ b/lib/hipe/llvm/hipe_llvm_merge.erl @@ -13,7 +13,7 @@ finalize(CompiledCode, Closures, Exports) -> Code = [{MFA, [], ConstTab} || {MFA, _, _ , ConstTab, _, _} <- CompiledCode1], {ConstAlign, ConstSize, ConstMap, RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, ?ARCH_REGISTERS:alignment()), + hipe_pack_constants:pack_constants(Code), %% Compute total code size separately as a sanity check for alignment CodeSize = compute_code_size(CompiledCode1, 0), %% io:format("Code Size (pre-computed): ~w~n", [CodeSize]), diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl index 2179f7f765..79e1bfd381 100644 --- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl +++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl @@ -156,9 +156,6 @@ translate_instr(I, Relocs, Data) -> #alub{} -> {I2, Relocs2} = trans_alub(I, Relocs), {I2, Relocs2, Data}; - #branch{} -> - {I2, Relocs2} = trans_branch(I, Relocs), - {I2, Relocs2, Data}; #call{} -> {I2, Relocs2} = case hipe_rtl:call_fun(I) of @@ -255,10 +252,9 @@ trans_alub(I, Relocs) -> trans_alub_overflow(I, Sign, Relocs) -> {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)), {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)), - RtlDst = hipe_rtl:alub_dst(I), TmpDst = mk_temp(), Name = trans_alub_op(I, Sign), - NewRelocs = relocs_store(Name, {call, {llvm, Name, 2}}, Relocs), + NewRelocs = relocs_store(Name, {call, remote, {llvm, Name, 2}}, Relocs), WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), ReturnType = hipe_llvm:mk_struct([WordTy, hipe_llvm:mk_int(1)]), T1 = mk_temp(), @@ -266,7 +262,10 @@ trans_alub_overflow(I, Sign, Relocs) -> [{WordTy, Src1}, {WordTy, Src2}], []), %% T1{0}: result of the operation I4 = hipe_llvm:mk_extractvalue(TmpDst, ReturnType, T1 , "0", []), - I5 = store_stack_dst(TmpDst, RtlDst), + I5 = case hipe_rtl:alub_has_dst(I) of + false -> []; + true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I)) + end, T2 = mk_temp(), %% T1{1}: Boolean variable indicating overflow I6 = hipe_llvm:mk_extractvalue(T2, ReturnType, T1, "1", []), @@ -311,42 +310,35 @@ trans_alub_op(I, Sign) -> Name ++ Type. trans_alub_no_overflow(I, Relocs) -> + {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)), + {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)), + WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), %% alu - T = hipe_rtl:mk_alu(hipe_rtl:alub_dst(I), hipe_rtl:alub_src1(I), - hipe_rtl:alub_op(I), hipe_rtl:alub_src2(I)), - %% A trans_alu instruction cannot change relocations - {I1, _} = trans_alu(T, Relocs), + {CmpLhs, CmpRhs, I5, Cond} = + case {hipe_rtl:alub_has_dst(I), hipe_rtl:alub_op(I)} of + {false, 'sub'} -> + Cond0 = trans_branch_rel_op(hipe_rtl:alub_cond(I)), + {Src1, Src2, [], Cond0}; + {HasDst, AlubOp} -> + TmpDst = mk_temp(), + Op = trans_op(AlubOp), + I3 = hipe_llvm:mk_operation(TmpDst, Op, WordTy, Src1, Src2, []), + I4 = case HasDst of + false -> []; + true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I)) + end, + Cond0 = trans_alub_rel_op(hipe_rtl:alub_cond(I)), + {TmpDst, "0", [I4, I3], Cond0} + end, %% icmp - %% Translate destination as src, to match with the semantics of instruction - {Dst, I2} = trans_src(hipe_rtl:alub_dst(I)), - Cond = trans_rel_op(hipe_rtl:alub_cond(I)), T3 = mk_temp(), - WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), - I5 = hipe_llvm:mk_icmp(T3, Cond, WordTy, Dst, "0"), + I6 = hipe_llvm:mk_icmp(T3, Cond, WordTy, CmpLhs, CmpRhs), %% br Metadata = branch_metadata(hipe_rtl:alub_pred(I)), True_label = mk_jump_label(hipe_rtl:alub_true_label(I)), False_label = mk_jump_label(hipe_rtl:alub_false_label(I)), - I6 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata), - {[I6, I5, I2, I1], Relocs}. - -%% -%% branch -%% -trans_branch(I, Relocs) -> - {Src1, I1} = trans_src(hipe_rtl:branch_src1(I)), - {Src2, I2} = trans_src(hipe_rtl:branch_src2(I)), - Cond = trans_rel_op(hipe_rtl:branch_cond(I)), - %% icmp - T1 = mk_temp(), - WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), - I3 = hipe_llvm:mk_icmp(T1, Cond, WordTy, Src1, Src2), - %% br - True_label = mk_jump_label(hipe_rtl:branch_true_label(I)), - False_label = mk_jump_label(hipe_rtl:branch_false_label(I)), - Metadata = branch_metadata(hipe_rtl:branch_pred(I)), - I4 = hipe_llvm:mk_br_cond(T1, True_label, False_label, Metadata), - {[I4, I3, I2, I1], Relocs}. + I7 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata), + {[I7, I6, I5, I2, I1], Relocs}. branch_metadata(X) when X =:= 0.5 -> []; branch_metadata(X) when X > 0.5 -> ?BRANCH_META_TAKEN; @@ -365,7 +357,7 @@ trans_call(I, Relocs) -> {LoadedFixedRegs, I2} = load_fixed_regs(FixedRegs), FinalArgs = fix_reg_args(LoadedFixedRegs) ++ CallArgs, {Name, I3, Relocs2} = - trans_call_name(RtlCallName, Relocs1, CallArgs, FinalArgs), + trans_call_name(RtlCallName, hipe_rtl:call_type(I), Relocs1, CallArgs, FinalArgs), T1 = mk_temp(), WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), FunRetTy = hipe_llvm:mk_struct(lists:duplicate(?NR_PINNED_REGS + 1, WordTy)), @@ -431,17 +423,17 @@ expose_closure(CallName, CallArgs, Relocs) -> {[], Relocs} end. -trans_call_name(RtlCallName, Relocs, CallArgs, FinalArgs) -> +trans_call_name(RtlCallName, RtlCallType, Relocs, CallArgs, FinalArgs) -> case RtlCallName of PrimOp when is_atom(PrimOp) -> LlvmName = trans_prim_op(PrimOp), Relocs1 = - relocs_store(LlvmName, {call, {bif, PrimOp, length(CallArgs)}}, Relocs), + relocs_store(LlvmName, {call, not_remote, {bif, PrimOp, length(CallArgs)}}, Relocs), {"@" ++ LlvmName, [], Relocs1}; {M, F, A} when is_atom(M), is_atom(F), is_integer(A) -> - LlvmName = trans_mfa_name({M, F, A}), + LlvmName = trans_mfa_name({M, F, A}, RtlCallType), Relocs1 = - relocs_store(LlvmName, {call, {M, F, length(CallArgs)}}, Relocs), + relocs_store(LlvmName, {call, RtlCallType, {M, F, length(CallArgs)}}, Relocs), {"@" ++ LlvmName, [], Relocs1}; Reg -> case hipe_rtl:is_reg(Reg) of @@ -502,7 +494,7 @@ trans_enter(I, Relocs) -> {LoadedFixedRegs, I1} = load_fixed_regs(FixedRegs), FinalArgs = fix_reg_args(LoadedFixedRegs) ++ CallArgs, {Name, I2, NewRelocs} = - trans_call_name(hipe_rtl:enter_fun(I), Relocs, CallArgs, FinalArgs), + trans_call_name(hipe_rtl:enter_fun(I), hipe_rtl:enter_type(I), Relocs, CallArgs, FinalArgs), T1 = mk_temp(), WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), FunRetTy = hipe_llvm:mk_struct(lists:duplicate(?NR_PINNED_REGS + 1, WordTy)), @@ -1036,8 +1028,12 @@ llvm_id(C) -> io_lib:format("_~2.16.0B_",[C]). %% @doc Create an acceptable LLVM identifier for an MFA. -trans_mfa_name({M,F,A}) -> - N = atom_to_list(M) ++ "." ++ atom_to_list(F) ++ "." ++ integer_to_list(A), +trans_mfa_name({M,F,A}, Linkage) -> + N0 = atom_to_list(M) ++ "." ++ atom_to_list(F) ++ "." ++ integer_to_list(A), + N = case Linkage of + not_remote -> N0; + remote -> "rem." ++ N0 + end, make_llvm_id(N). %%------------------------------------------------------------------------------ @@ -1158,7 +1154,7 @@ trans_dst(A) -> true -> "%DL" ++ integer_to_list(hipe_rtl:const_label_label(A)) ++ "_var"; false -> - exit({?MODULE, trans_dst, {"Bad RTL argument",A}}) + error(badarg, [A]) end end end, @@ -1256,14 +1252,19 @@ trans_op(Op) -> Other -> exit({?MODULE, trans_op, {"Unknown RTL operator", Other}}) end. -trans_rel_op(Op) -> +trans_branch_rel_op(Op) -> case Op of - eq -> eq; - ne -> ne; gtu -> ugt; geu -> uge; ltu -> ult; leu -> ule; + _ -> trans_alub_rel_op(Op) + end. + +trans_alub_rel_op(Op) -> + case Op of + eq -> eq; + ne -> ne; gt -> sgt; ge -> sge; lt -> slt; @@ -1296,7 +1297,10 @@ insn_dst(I) -> #alu{} -> [hipe_rtl:alu_dst(I)]; #alub{} -> - [hipe_rtl:alub_dst(I)]; + case hipe_rtl:alub_has_dst(I) of + true -> [hipe_rtl:alub_dst(I)]; + false -> [] + end; #call{} -> case hipe_rtl:call_dstlist(I) of [] -> []; @@ -1339,7 +1343,7 @@ llvm_type_from_size(Size) -> %% precoloured registers that are passed as arguments must be stored to %% the corresonding stack slots. create_function_definition(Fun, Params, Code, LocalVars) -> - FunctionName = trans_mfa_name(Fun), + FunctionName = trans_mfa_name(Fun, not_remote), FixedRegs = fixed_registers(), %% Reverse parameters to match with the Erlang calling convention ReversedParams = @@ -1360,7 +1364,7 @@ create_function_definition(Fun, Params, Code, LocalVars) -> EntryBlock = lists:flatten([EntryLabel, ExceptionSync, I2, LocalVars, StoredParams, I3]), Final_Code = EntryBlock ++ Code, - FunctionOptions = [nounwind, noredzone, list_to_atom("gc \"erlang\"")], + FunctionOptions = [nounwind, noredzone, 'gc "erlang"'], WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), FunRetTy = hipe_llvm:mk_struct(lists:duplicate(?NR_PINNED_REGS + 1, WordTy)), hipe_llvm:mk_fun_def([], [], "cc 11", [], FunRetTy, FunctionName, Args, @@ -1427,7 +1431,7 @@ relocs_to_list(Relocs) -> %% constants/labels. handle_relocations(Relocs, Data, Fun) -> RelocsList = relocs_to_list(Relocs), - %% Seperate Relocations according to their type + %% Separate Relocations according to their type {CallList, AtomList, ClosureList, ClosureLabels, SwitchList} = seperate_relocs(RelocsList), %% Create code to declare atoms @@ -1458,9 +1462,9 @@ handle_relocations(Relocs, Data, Fun) -> Relocs2 = lists:foldl(fun const_to_dict/2, Relocs1, ConstLabels), %% Temporary Store inc_stack and llvm_fix_pinned_regs to Dictionary %% TODO: Remove this - Relocs3 = dict:store("inc_stack_0", {call, {bif, inc_stack_0, 0}}, Relocs2), + Relocs3 = dict:store("inc_stack_0", {call, not_remote, {bif, inc_stack_0, 0}}, Relocs2), Relocs4 = dict:store("hipe_bifs.llvm_fix_pinned_regs.0", - {call, {hipe_bifs, llvm_fix_pinned_regs, 0}}, Relocs3), + {call, remote, {hipe_bifs, llvm_fix_pinned_regs, 0}}, Relocs3), BranchMetaData = [ hipe_llvm:mk_meta(?BRANCH_META_TAKEN, ["branch_weights", 99, 1]) , hipe_llvm:mk_meta(?BRANCH_META_NOT_TAKEN, ["branch_weights", 1, 99]) @@ -1470,7 +1474,7 @@ handle_relocations(Relocs, Data, Fun) -> LocalVariables = AtomLoad ++ ClosureLoad ++ ConstLoad, {Relocs4, ExternalDeclarations, LocalVariables}. -%% @doc Seperate relocations according to their type. +%% @doc Separate relocations according to their type. seperate_relocs(Relocs) -> seperate_relocs(Relocs, [], [], [], [], []). @@ -1478,9 +1482,10 @@ seperate_relocs([], CallAcc, AtomAcc, ClosureAcc, LabelAcc, JmpTableAcc) -> {CallAcc, AtomAcc, ClosureAcc, LabelAcc, JmpTableAcc}; seperate_relocs([R|Rs], CallAcc, AtomAcc, ClosureAcc, LabelAcc, JmpTableAcc) -> case R of - {_, {call, _}} -> + {_, {call, _, _}} -> seperate_relocs(Rs, [R | CallAcc], AtomAcc, ClosureAcc, LabelAcc, JmpTableAcc); + {_, {atom, _}} -> seperate_relocs(Rs, CallAcc, [R | AtomAcc], ClosureAcc, LabelAcc, JmpTableAcc); @@ -1525,7 +1530,7 @@ load_closure({ClosureName, _})-> %% @doc Declaration of a local variable for a switch jump table. declare_switches(JumpTableList, Fun) -> - FunName = trans_mfa_name(Fun), + FunName = trans_mfa_name(Fun, not_remote), [declare_switch_table(X, FunName) || X <- JumpTableList]. declare_switch_table({Name, {switch, {TableType, Labels, _, _}, _}}, FunName) -> @@ -1541,7 +1546,7 @@ declare_switch_table({Name, {switch, {TableType, Labels, _, _}, _}}, FunName) -> declare_closure_labels([], Relocs, _Fun) -> {[], Relocs}; declare_closure_labels(ClosureLabels, Relocs, Fun) -> - FunName = trans_mfa_name(Fun), + FunName = trans_mfa_name(Fun, not_remote), {LabelList, ArityList} = lists:unzip([{mk_jump_label(Label), A} || {_, {closure_label, Label, A}} <- ClosureLabels]), @@ -1557,13 +1562,13 @@ declare_closure_labels(ClosureLabels, Relocs, Fun) -> hipe_llvm:mk_const_decl("@table_closures", "constant", TableType, List4), {[ConstDecl], Relocs1}. -%% @doc A call is treated as non external only in a case of a recursive +%% @doc A call is treated as non external only in a case of a local recursive %% function. -is_external_call({_, {call, Fun}}, Fun) -> false; +is_external_call({_, {call, not_remote, MFA}}, MFA) -> false; is_external_call(_, _) -> true. %% @doc External declaration of a function. -call_to_decl({Name, {call, MFA}}) -> +call_to_decl({Name, {call, _, MFA}}) -> {M, _F, A} = MFA, CConv = "cc 11", WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), diff --git a/lib/hipe/main/hipe.app.src b/lib/hipe/main/hipe.app.src index f8487151d7..3c3a1004f1 100644 --- a/lib/hipe/main/hipe.app.src +++ b/lib/hipe/main/hipe.app.src @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2002-2015. All Rights Reserved. +%% Copyright Ericsson AB 2002-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -49,12 +49,13 @@ hipe_amd64_ra_naive, hipe_amd64_ra_postconditions, hipe_amd64_ra_sse2_postconditions, - hipe_amd64_ra_x87_ls, hipe_amd64_registers, hipe_amd64_specific, hipe_amd64_specific_sse2, hipe_amd64_specific_x87, hipe_amd64_spill_restore, + hipe_amd64_sse2, + hipe_amd64_subst, hipe_amd64_x87, hipe_arm, hipe_arm_assemble, @@ -73,7 +74,9 @@ hipe_arm_ra_postconditions, hipe_arm_registers, hipe_arm_specific, + hipe_arm_subst, hipe_bb, + hipe_bb_weights, hipe_beam_to_icode, hipe_coalescing_regalloc, hipe_consttab, @@ -81,6 +84,7 @@ hipe_digraph, hipe_dominators, hipe_dot, + hipe_dsets, hipe_gen_cfg, hipe_gensym, hipe_graph_coloring_regalloc, @@ -142,9 +146,13 @@ hipe_ppc_registers, hipe_ppc_specific, hipe_ppc_specific_fp, + hipe_ppc_subst, hipe_profile, + hipe_range_split, hipe_reg_worklists, hipe_regalloc_loop, + hipe_regalloc_prepass, + hipe_restore_reuse, hipe_rtl, hipe_rtl_arch, hipe_rtl_arith_32, @@ -171,6 +179,7 @@ hipe_rtl_to_sparc, hipe_rtl_to_x86, hipe_rtl_varmap, + hipe_segment_trees, hipe_sdi, hipe_sparc, hipe_sparc_assemble, @@ -193,6 +202,7 @@ hipe_sparc_registers, hipe_sparc_specific, hipe_sparc_specific_fp, + hipe_sparc_subst, hipe_spillcost, hipe_spillmin, hipe_spillmin_color, @@ -216,14 +226,14 @@ hipe_x86_ra_ls, hipe_x86_ra_naive, hipe_x86_ra_postconditions, - hipe_x86_ra_x87_ls, hipe_x86_registers, hipe_x86_specific, hipe_x86_specific_x87, hipe_x86_spill_restore, + hipe_x86_subst, hipe_x86_x87]}, {registered,[]}, {applications, [kernel,stdlib]}, {env, []}, {runtime_dependencies, ["syntax_tools-1.6.14","stdlib-2.5","kernel-3.0", - "erts-7.1","compiler-5.0"]}]}. + "erts-9.0","compiler-5.0"]}]}. diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl index cc0148a80e..19b4e8bfe2 100644 --- a/lib/hipe/main/hipe.erl +++ b/lib/hipe/main/hipe.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %% ==================================================================== %% Copyright (c) 1998 by Erik Johansson. All Rights Reserved %% ==================================================================== @@ -447,7 +441,7 @@ compile(Name, File, Opts0) when is_atom(Name) -> ?error_msg("Cannot get Core Erlang code from BEAM binary.",[]), ?EXIT({cant_compile_core_from_binary}); true -> - case filename:find_src(filename:rootname(File, ".beam")) of + case filelib:find_source(filename:rootname(File,".beam") ++ ".beam") of {error, _} -> ?error_msg("Cannot find source code for ~p.", [File]), ?EXIT({cant_find_source_code}); @@ -661,7 +655,7 @@ run_compiler_1(Name, DisasmFun, IcodeFun, Options) -> case proplists:get_bool(to_llvm, Opts0) andalso not llvm_support_available() of true -> - ?error_msg("No LLVM version 3.4 or greater " + ?error_msg("No LLVM version 3.9 or greater " "found in $PATH; aborting " "native code compilation.\n", []), ?EXIT(cant_find_required_llvm_version); @@ -1139,9 +1133,10 @@ help_hiper() -> help_options() -> HostArch = erlang:system_info(hipe_architecture), - O1 = expand_options([o1], HostArch), - O2 = expand_options([o2], HostArch), - O3 = expand_options([o3], HostArch), + O0 = expand_options([o0] ++ ?COMPILE_DEFAULTS, HostArch), + O1 = expand_options([o1] ++ ?COMPILE_DEFAULTS, HostArch), + O2 = expand_options([o2] ++ ?COMPILE_DEFAULTS, HostArch), + O3 = expand_options([o3] ++ ?COMPILE_DEFAULTS, HostArch), io:format("HiPE Compiler Options\n" ++ " Boolean-valued options generally have corresponding " ++ "aliases `no_...',\n" ++ @@ -1160,15 +1155,16 @@ help_options() -> " pp_x86 = pp_native,\n" ++ " pp_amd64 = pp_native,\n" ++ " pp_ppc = pp_native,\n" ++ - " o0,\n" ++ - " o1 = ~p,\n" ++ + " o0 = ~p,\n" ++ + " o1 = ~p ++ o0,\n" ++ " o2 = ~p ++ o1,\n" ++ " o3 = ~p ++ o2.\n", [ordsets:from_list([verbose, debug, time, load, pp_beam, pp_icode, pp_rtl, pp_native, pp_asm, timeout]), expand_options([pp_all], HostArch), - O1 -- [o1], + O0 -- [o0], + (O1 -- O0) -- [o1], (O2 -- O1) -- [o2], (O3 -- O2) -- [o3]]), ok. @@ -1234,6 +1230,18 @@ option_text(regalloc) -> " optimistic - another variant of a coalescing allocator"; option_text(remove_comments) -> "Strip comments from intermediate code"; +option_text(ra_range_split) -> + "Split live ranges of temporaries live over call instructions\n" + "before performing register allocation.\n" + "Heuristically tries to move stack accesses to the cold path of function.\n" + "This range splitter is more sophisticated than 'ra_restore_reuse', but has\n" + "a significantly larger impact on compile time.\n" + "Should only be used with move coalescing register allocators."; +option_text(ra_restore_reuse) -> + "Split live ranges of temporaries such that straight-line\n" + "code will not need to contain multiple restores from the same stack\n" + "location.\n" + "Should only be used with move coalescing register allocators."; option_text(rtl_ssa) -> "Perform SSA conversion on the RTL level -- default starting at O2"; option_text(rtl_ssa_const_prop) -> @@ -1373,6 +1381,14 @@ opt_keys() -> pp_rtl_lcm, pp_rtl_ssapre, pp_rtl_linear, + ra_partitioned, + ra_prespill, + ra_range_split, + ra_restore_reuse, + range_split_min_gain, + range_split_mode1_fudge, + range_split_weight_power, + range_split_weights, regalloc, remove_comments, rtl_ssa, @@ -1403,8 +1419,16 @@ opt_keys() -> %% Definitions: +o0_opts(_TargetArch) -> + [concurrent_comp, {regalloc,linear_scan}]. + o1_opts(TargetArch) -> - Common = [inline_fp, pmatch, peephole], + Common = [inline_fp, pmatch, peephole, ra_prespill, ra_partitioned, + icode_ssa_const_prop, icode_ssa_copy_prop, icode_inline_bifs, + rtl_ssa, rtl_ssa_const_prop, rtl_ssapre, + spillmin_color, use_indexing, remove_comments, + binary_opt, {regalloc,coalescing}, ra_restore_reuse + | o0_opts(TargetArch)], case TargetArch of ultrasparc -> Common; @@ -1423,11 +1447,9 @@ o1_opts(TargetArch) -> end. o2_opts(TargetArch) -> - Common = [icode_ssa_const_prop, icode_ssa_copy_prop, % icode_ssa_struct_reuse, - icode_type, icode_inline_bifs, icode_call_elim, rtl_lcm, - rtl_ssa, rtl_ssa_const_prop, - spillmin_color, use_indexing, remove_comments, - concurrent_comp, binary_opt | o1_opts(TargetArch)], + Common = [icode_type, icode_call_elim, % icode_ssa_struct_reuse, + ra_range_split, range_split_weights, % XXX: Having defaults here is ugly + rtl_lcm | (o1_opts(TargetArch) -- [rtl_ssapre, ra_restore_reuse])], case TargetArch of T when T =:= amd64 orelse T =:= ppc64 -> % 64-bit targets [icode_range | Common]; @@ -1437,7 +1459,7 @@ o2_opts(TargetArch) -> o3_opts(TargetArch) -> %% no point checking for target architecture since this is checked in 'o1' - [icode_range, {regalloc,coalescing} | o2_opts(TargetArch)]. + [icode_range | o2_opts(TargetArch)]. %% Note that in general, the normal form for options should be positive. %% This is a good programming convention, so that tests in the code say @@ -1473,6 +1495,11 @@ opt_negations() -> {no_pp_native, pp_native}, {no_pp_rtl_lcm, pp_rtl_lcm}, {no_pp_rtl_ssapre, pp_rtl_ssapre}, + {no_ra_partitioned, ra_partitioned}, + {no_ra_prespill, ra_prespill}, + {no_ra_range_split, ra_range_split}, + {no_ra_restore_reuse, ra_restore_reuse}, + {no_range_split_weights, range_split_weights}, {no_remove_comments, remove_comments}, {no_rtl_ssa, rtl_ssa}, {no_rtl_ssa_const_prop, rtl_ssa_const_prop}, @@ -1502,7 +1529,8 @@ opt_basic_expansions() -> [{pp_all, [pp_beam, pp_icode, pp_rtl, pp_native]}]. opt_expansions(TargetArch) -> - [{o1, o1_opts(TargetArch)}, + [{o0, o0_opts(TargetArch)}, + {o1, o1_opts(TargetArch)}, {o2, o2_opts(TargetArch)}, {o3, o3_opts(TargetArch)}, {to_llvm, llvm_opts(o3, TargetArch)}, @@ -1549,13 +1577,21 @@ expand_kt2(Opts) -> -spec expand_options(comp_options(), hipe_architecture()) -> comp_options(). -expand_options(Opts, TargetArch) -> +expand_options(Opts0, TargetArch) -> + Opts1 = proplists:normalize(Opts0, [{aliases, opt_aliases()}]), + Opts = normalise_opt_options(Opts1), proplists:normalize(Opts, [{negations, opt_negations()}, - {aliases, opt_aliases()}, {expand, opt_basic_expansions()}, {expand, opt_expansions(TargetArch)}, {negations, opt_negations()}]). +normalise_opt_options([o0|Opts]) -> [o0] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o1|Opts]) -> [o1] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o2|Opts]) -> [o2] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([o3|Opts]) -> [o3] ++ (Opts -- [o0, o1, o2, o3]); +normalise_opt_options([O|Opts]) -> [O|normalise_opt_options(Opts)]; +normalise_opt_options([]) -> []. + -spec check_options(comp_options()) -> 'ok'. check_options(Opts) -> @@ -1572,7 +1608,7 @@ check_options(Opts) -> -spec llvm_support_available() -> boolean(). llvm_support_available() -> - get_llvm_version() >= {3,4}. + get_llvm_version() >= {3,9}. -type llvm_version() :: {Major :: integer(), Minor :: integer()}. diff --git a/lib/hipe/main/hipe.hrl.src b/lib/hipe/main/hipe.hrl.src index b001743038..b9accf0054 100644 --- a/lib/hipe/main/hipe.hrl.src +++ b/lib/hipe/main/hipe.hrl.src @@ -1,9 +1,5 @@ %% -*- mode: erlang; erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Filename : hipe.hrl (automatically generated by hipe.hrl.src) diff --git a/lib/hipe/main/hipe_main.erl b/lib/hipe/main/hipe_main.erl index 8737560ad5..dca6fddec3 100644 --- a/lib/hipe/main/hipe_main.erl +++ b/lib/hipe/main/hipe_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %% @doc This is the HiPE compiler's main "loop". %% %% <h3>Purpose</h3> diff --git a/lib/hipe/misc/Makefile b/lib/hipe/misc/Makefile index 72cfff21a8..e5033e444b 100644 --- a/lib/hipe/misc/Makefile +++ b/lib/hipe/misc/Makefile @@ -44,7 +44,7 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) # Target Specs # ---------------------------------------------------- ifdef HIPE_ENABLED -HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi +HIPE_MODULES = hipe_data_pp hipe_pack_constants hipe_sdi hipe_segment_trees else HIPE_MODULES = endif diff --git a/lib/hipe/misc/hipe_consttab.erl b/lib/hipe/misc/hipe_consttab.erl index 226b20fa46..741bdb2094 100644 --- a/lib/hipe/misc/hipe_consttab.erl +++ b/lib/hipe/misc/hipe_consttab.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @doc @@ -69,9 +63,7 @@ %% A hipe_consttab is a tuple {Data, ReferedLabels, NextConstLabel} %% @type hipe_constlbl(). %% An abstract datatype for referring to data. -%% @type element_type() = byte | word | ctab_array() -%% @type ctab_array() = {ctab_array, Type::element_type(), -%% NoElements::pos_integer()} +%% @type element_type() = byte | word %% @type block() = [integer() | label_ref()] %% @type label_ref() = {label, Label::code_label()} %% @type code_label() = hipe_sparc:label_name() | hipe_x86:label_name() @@ -116,8 +108,7 @@ -type label_ref() :: {'label', code_label()}. -type block() :: [hipe_constlbl() | label_ref()]. --type ctab_array() :: {'ctab_array', 'byte' | 'word', pos_integer()}. --type element_type() :: 'byte' | 'word' | ctab_array(). +-type element_type() :: 'byte' | 'word'. -type sort_order() :: term(). % XXX: FIXME @@ -193,7 +184,7 @@ insert_block({ConstTab, RefToLabels, NextLabel}, ElementType, InitList) -> ReferredLabels = get_labels(InitList, []), NewRefTo = ReferredLabels ++ RefToLabels, {NewTa, Id} = insert_const({ConstTab, NewRefTo, NextLabel}, - block, word_size(), false, + block, size_of(ElementType), false, {ElementType,InitList}), {insert_backrefs(NewTa, Id, ReferredLabels), Id}. @@ -262,13 +253,9 @@ get_labels([], Acc) -> %% @spec size_of(element_type()) -> pos_integer() %% @doc Returns the size in bytes of an element_type. -%% The is_atom/1 guard in the clause handling arrays -%% constraints the argument to 'byte' | 'word' -spec size_of(element_type()) -> pos_integer(). size_of(byte) -> 1; -size_of(word) -> word_size(); -size_of({ctab_array,S,N}) when is_atom(S), is_integer(N), N > 0 -> - N * size_of(S). +size_of(word) -> word_size(). %% @spec decompose({element_type(), block()}) -> [byte()] %% @doc Turns a block into a list of bytes. diff --git a/lib/hipe/misc/hipe_consttab.hrl b/lib/hipe/misc/hipe_consttab.hrl index 550da0455c..4d2d357a0b 100644 --- a/lib/hipe/misc/hipe_consttab.hrl +++ b/lib/hipe/misc/hipe_consttab.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%----------------------------------------------------------------------------- diff --git a/lib/hipe/misc/hipe_data_pp.erl b/lib/hipe/misc/hipe_data_pp.erl index 6cdc6c5ad2..2c737b6d78 100644 --- a/lib/hipe/misc/hipe_data_pp.erl +++ b/lib/hipe/misc/hipe_data_pp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved -%% Time-stamp: <2008-04-20 14:57:08 richard> %% ==================================================================== %% Module : hipe_data_pp %% Purpose : diff --git a/lib/hipe/misc/hipe_gensym.erl b/lib/hipe/misc/hipe_gensym.erl index da7c4f9a5d..548071fd8f 100644 --- a/lib/hipe/misc/hipe_gensym.erl +++ b/lib/hipe/misc/hipe_gensym.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,16 +11,12 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%======================================================================= %% File : hipe_gensym.erl %% Author : Eric Johansson and Kostis Sagonas %% Description : Generates unique symbols and fresh integer counts. %%======================================================================= -%% $Id$ -%%======================================================================= %% Notes: Written while we were in Montreal, Canada for PPDP-2000 as an %% exercise in Principles and Practice of Declarative Programming! %%======================================================================= diff --git a/lib/hipe/misc/hipe_pack_constants.erl b/lib/hipe/misc/hipe_pack_constants.erl index b54830dd57..6736d1f503 100644 --- a/lib/hipe/misc/hipe_pack_constants.erl +++ b/lib/hipe/misc/hipe_pack_constants.erl @@ -1,10 +1,5 @@ %% -*- erlang-indent-level: 2 -*- -%%============================================================================= %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_pack_constants). --export([pack_constants/2, slim_refs/1, slim_constmap/1, +-export([pack_constants/1, slim_refs/1, slim_constmap/1, find_const/2, mk_data_relocs/2, slim_sorted_exportmap/3]). -include("hipe_consttab.hrl"). @@ -45,8 +37,8 @@ -record(pcm_entry, {mfa :: mfa(), label :: hipe_constlbl(), - const_num :: const_num(), - start :: addr(), + const_num :: const_num(), + start :: addr(), type :: 0 | 1 | 2, raw_data :: raw_data()}). -type pcm_entry() :: #pcm_entry{}. @@ -61,11 +53,11 @@ %%----------------------------------------------------------------------------- --spec pack_constants([{mfa(),[_],hipe_consttab()}], ct_alignment()) -> +-spec pack_constants([{mfa(),[_],hipe_consttab()}]) -> {ct_alignment(), non_neg_integer(), packed_const_map(), mfa_refs_map()}. -pack_constants(Data, Align) -> - pack_constants(Data, 0, Align, 0, [], []). +pack_constants(Data) -> + pack_constants(Data, 0, 1, 0, [], []). % 1 = byte alignment pack_constants([{MFA,_,ConstTab}|Rest], Size, Align, ConstNo, Acc, Refs) -> Labels = hipe_consttab:labels(ConstTab), diff --git a/lib/hipe/misc/hipe_sdi.erl b/lib/hipe/misc/hipe_sdi.erl index fbb4b105f6..9a60382686 100644 --- a/lib/hipe/misc/hipe_sdi.erl +++ b/lib/hipe/misc/hipe_sdi.erl @@ -1,10 +1,6 @@ %%% -*- erlang-indent-level: 2 -*- %%%====================================================================== %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% An implementation of the algorithm described in: %%% "Assembling Code for Machines with Span-Dependent Instructions", @@ -36,10 +30,13 @@ %%------------------------------------------------------------------------ -type hipe_array() :: integer(). % declare this in hipe.hrl or builtin? +-type hipe_vector(E) :: {} | {E} | {E, E} | {E, E, E} | tuple(). -type label() :: non_neg_integer(). -type address() :: non_neg_integer(). +-type parents() :: {hipe_vector(_ :: integer()), hipe_segment_trees:tree()}. + %%------------------------------------------------------------------------ -record(label_data, {address :: address(), @@ -168,9 +165,11 @@ mk_long(N) -> %%% - Since the graph is traversed from child to parent nodes in %%% Step 3, the edges are represented by a vector PARENTS[0..n-1] %%% such that PARENTS[j] = { i | i is a parent of j }. -%%% - An explicit PARENTS graph would have size O(n^2). Instead we -%%% compute PARENTS[j] from the SDI vector when needed. This -%%% reduces memory overheads, and may reduce time overheads too. +%%% - An explicit PARENTS graph would have size O(n^2). Instead, we +%%% observe that (i is a parent of j) iff (j \in range(i)), where +%%% range(i) is a constant function. We can thus precompute all the +%%% ranges i and insert them into a data structure built for such +%%% queries. In this case, we use a segment tree. -spec mk_span(non_neg_integer(), tuple()) -> hipe_array(). mk_span(N, SDIS) -> @@ -188,7 +187,29 @@ initSPAN(SdiNr, N, SDIS, SPAN) -> initSPAN(SdiNr+1, N, SDIS, SPAN) end. -mk_parents(N, SDIS) -> {N,SDIS}. +-spec mk_parents(non_neg_integer(), tuple()) -> parents(). +mk_parents(N, SDIS) -> + PrevSDIS = vector_from_list(select_prev_sdis(N-1, SDIS, [])), + Ranges = parents_generate_ranges(N-1, PrevSDIS, []), + {PrevSDIS, hipe_segment_trees:build(Ranges)}. + +select_prev_sdis(-1, _SDIS, Acc) -> Acc; +select_prev_sdis(SdiNr, SDIS, Acc) -> + #sdi_data{prevSdi=PrevSdi} = vector_sub(SDIS, SdiNr), + select_prev_sdis(SdiNr-1, SDIS, [PrevSdi|Acc]). + +parents_generate_ranges(-1, _PrevSDIS, Acc) -> Acc; +parents_generate_ranges(SdiNr, PrevSDIS, Acc) -> + %% inclusive + {LO,HI} = parents_generate_range(SdiNr, PrevSDIS), + parents_generate_ranges(SdiNr-1, PrevSDIS, [{LO,HI}|Acc]). + +-compile({inline, parents_generate_range/2}). +parents_generate_range(SdiNr, PrevSDIS) -> + PrevSdi = vector_sub(PrevSDIS, SdiNr), + if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards + true -> {PrevSdi+1, SdiNr-1} % backwards + end. %%% "After the structure is built we process it as follows. %%% For any node i whose listed span exceeds the architectural @@ -209,7 +230,7 @@ mk_parents(N, SDIS) -> {N,SDIS}. %%% and PARENTS are no longer useful. -spec update_long(non_neg_integer(), tuple(), hipe_array(), - {non_neg_integer(),tuple()},hipe_array()) -> 'ok'. + parents(),hipe_array()) -> 'ok'. update_long(N, SDIS, SPAN, PARENTS, LONG) -> WKL = initWKL(N-1, SDIS, SPAN, []), processWKL(WKL, SDIS, SPAN, PARENTS, LONG). @@ -225,46 +246,32 @@ initWKL(SdiNr, SDIS, SPAN, WKL) -> end. -spec processWKL([non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(), tuple()}, hipe_array()) -> 'ok'. + parents(), hipe_array()) -> 'ok'. processWKL([], _SDIS, _SPAN, _PARENTS, _LONG) -> ok; -processWKL([Child|WKL], SDIS, SPAN, PARENTS, LONG) -> - WKL2 = updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG), +processWKL([Child|WKL], SDIS, SPAN, PARENTS0, LONG) -> + {WKL2, PARENTS} = + case array_sub(SPAN, Child) of + 0 -> {WKL, PARENTS0}; % removed + _ -> + SdiData = vector_sub(SDIS, Child), + Incr = sdiLongIncr(SdiData), + array_update(LONG, Child, Incr), + array_update(SPAN, Child, 0), % remove child + PARENTS1 = deleteParent(PARENTS0, Child), + PS = parentsOfChild(PARENTS1, Child), + {updateParents(PS, Child, Incr, SDIS, SPAN, WKL), PARENTS1} + end, processWKL(WKL2, SDIS, SPAN, PARENTS, LONG). --spec updateChild(non_neg_integer(), [non_neg_integer()], tuple(), hipe_array(), - {non_neg_integer(),tuple()}, hipe_array()) -> [non_neg_integer()]. -updateChild(Child, WKL, SDIS, SPAN, PARENTS, LONG) -> - case array_sub(SPAN, Child) of - 0 -> WKL; % removed - _ -> - SdiData = vector_sub(SDIS, Child), - Incr = sdiLongIncr(SdiData), - array_update(LONG, Child, Incr), - array_update(SPAN, Child, 0), % remove child - PS = parentsOfChild(PARENTS, Child), - updateParents(PS, Child, Incr, SDIS, SPAN, WKL) - end. +-spec parentsOfChild(parents(), non_neg_integer()) -> [non_neg_integer()]. +parentsOfChild({_PrevSDIS, SegTree}, Child) -> + hipe_segment_trees:intersect(Child, SegTree). --spec parentsOfChild({non_neg_integer(),tuple()}, - non_neg_integer()) -> [non_neg_integer()]. -parentsOfChild({N,SDIS}, Child) -> - parentsOfChild(N-1, SDIS, Child, []). - --spec parentsOfChild(integer(), tuple(), non_neg_integer(), - [non_neg_integer()]) -> [non_neg_integer()]. -parentsOfChild(-1, _SDIS, _Child, PS) -> PS; -parentsOfChild(SdiNr, SDIS, Child, PS) -> - SdiData = vector_sub(SDIS, SdiNr), - #sdi_data{prevSdi=PrevSdi} = SdiData, - {LO,HI} = % inclusive - if SdiNr =< PrevSdi -> {SdiNr+1, PrevSdi}; % forwards - true -> {PrevSdi+1, SdiNr-1} % backwards - end, - NewPS = - if LO =< Child, Child =< HI -> [SdiNr | PS]; - true -> PS - end, - parentsOfChild(SdiNr-1, SDIS, Child, NewPS). +-spec deleteParent(parents(), non_neg_integer()) -> parents(). +deleteParent({PrevSDIS, SegTree0}, Parent) -> + {LO,HI} = parents_generate_range(Parent, PrevSDIS), + SegTree = hipe_segment_trees:delete(Parent, LO, HI, SegTree0), + {PrevSDIS, SegTree}. -spec updateParents([non_neg_integer()], non_neg_integer(), byte(), tuple(), hipe_array(), @@ -297,10 +304,12 @@ updateWKL(SdiNr, SDIS, SdiSpan, WKL) -> false -> [SdiNr|WKL] end. +-compile({inline, sdiSpanIsShort/2}). %% Only called once -spec sdiSpanIsShort(#sdi_data{}, integer()) -> boolean(). sdiSpanIsShort(#sdi_data{si = #sdi_info{lb = LB, ub = UB}}, SdiSpan) -> SdiSpan >= LB andalso SdiSpan =< UB. +-compile({inline, sdiLongIncr/1}). %% Only called once -spec sdiLongIncr(#sdi_data{}) -> byte(). sdiLongIncr(#sdi_data{si = #sdi_info{incr = Incr}}) -> Incr. @@ -361,9 +370,11 @@ applyIncr([{Label,LabelData}|List], INCREMENT, LabelMap) -> %%% Currently implemented as tuples. %%% Used for the 'SDIS' and 'PARENTS' vectors. --spec vector_from_list([#sdi_data{}]) -> tuple(). +-spec vector_from_list([E]) -> hipe_vector(E). vector_from_list(Values) -> list_to_tuple(Values). +-compile({inline, vector_sub/2}). +-spec vector_sub(hipe_vector(E), non_neg_integer()) -> V when V :: E. vector_sub(Vec, I) -> element(I+1, Vec). %%% ADT for mutable integer arrays, indexed from 0 to N-1. @@ -373,8 +384,10 @@ vector_sub(Vec, I) -> element(I+1, Vec). -spec mk_array_of_zeros(non_neg_integer()) -> hipe_array(). mk_array_of_zeros(N) -> hipe_bifs:array(N, 0). +-compile({inline, array_update/3}). -spec array_update(hipe_array(), non_neg_integer(), integer()) -> hipe_array(). array_update(A, I, V) -> hipe_bifs:array_update(A, I, V). +-compile({inline, array_sub/2}). -spec array_sub(hipe_array(), non_neg_integer()) -> integer(). array_sub(A, I) -> hipe_bifs:array_sub(A, I). diff --git a/lib/hipe/misc/hipe_sdi.hrl b/lib/hipe/misc/hipe_sdi.hrl index a1e12f9df2..def697549c 100644 --- a/lib/hipe/misc/hipe_sdi.hrl +++ b/lib/hipe/misc/hipe_sdi.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,10 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% - -record(sdi_info, {lb :: integer(), % span lower bound for short form diff --git a/lib/hipe/misc/hipe_segment_trees.erl b/lib/hipe/misc/hipe_segment_trees.erl new file mode 100644 index 0000000000..3d6a7487ec --- /dev/null +++ b/lib/hipe/misc/hipe_segment_trees.erl @@ -0,0 +1,174 @@ +%%% Licensed under the Apache License, Version 2.0 (the "License"); +%%% you may not use this file except in compliance with the License. +%%% You may obtain a copy of the License at +%%% +%%% http://www.apache.org/licenses/LICENSE-2.0 +%%% +%%% Unless required by applicable law or agreed to in writing, software +%%% distributed under the License is distributed on an "AS IS" BASIS, +%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%%% See the License for the specific language governing permissions and +%%% limitations under the License. +%%% +%%% Segment trees, with a delete operation. +%%% +%%% Keys are the (0-based) indices into the list passed to build/1. +%%% +%%% Range bounds are inclusive. +%%% + +-module(hipe_segment_trees). + +-export([build/1, intersect/2, delete/4]). + +-record(segment_tree, { + lo :: integer(), + hi :: integer(), + root :: tnode() + }). + +%% X =< Mid belongs in Left +-define(NODE(Left, Right, Mid, Segments), {Left, Right, Mid, Segments}). + +-define(POINT_LEAF(Val), Val). +-define(RANGE_LEAF(Lo, Hi), {Lo, Hi}). + +-type segments() :: [non_neg_integer()]. +-type leaf() :: segments(). +-type tnode() :: ?NODE(tnode(), tnode(), integer(), segments()) | leaf(). + +-opaque tree() :: #segment_tree{} | nil. +-export_type([tree/0]). + +%% @doc Builds a segment tree of the given intervals. +-spec build([{integer(), integer()}]) -> tree(). +build(ListOfIntervals) -> + case + lists:usort( + lists:append( + [[Lo, Hi] || {Lo, Hi} <- ListOfIntervals, Lo =< Hi])) + of + [] -> nil; + Endpoints -> + Tree0 = empty_tree_from_endpoints(Endpoints), + [Lo|_] = Endpoints, + Hi = lists:last(Endpoints), + Tree1 = insert_intervals(0, ListOfIntervals, Lo, Hi, Tree0), + Tree = squash_empty_subtrees(Tree1), + #segment_tree{lo=Lo, hi=Hi, root=Tree} + end. + +empty_tree_from_endpoints(Endpoints) -> + Leaves = leaves(Endpoints), + {T, [], _, _} = balanced_bst(Leaves, length(Leaves)), + T. + +leaves([Endpoint]) -> [?POINT_LEAF(Endpoint)]; +leaves([A | [B|_] = Tail]) -> + %% We omit the range leaf if it's empty + case A<B-1 of + true -> [?POINT_LEAF(A),?RANGE_LEAF(A+1,B-1) | leaves(Tail)]; + false -> [?POINT_LEAF(A) | leaves(Tail)] + end. + +balanced_bst(L, S) when S > 1 -> + Sm = S, %% - 1 + S2 = Sm div 2, + S1 = Sm - S2, + {Left, L1, LeftLo, LeftHi} = balanced_bst(L, S1), + {Right, L2, _, RightHi} = balanced_bst(L1, S2), + T = ?NODE(Left, Right, LeftHi, []), + {T, L2, LeftLo, RightHi}; +balanced_bst([?RANGE_LEAF(Lo, Hi) | L], 1) -> + {[], L, Lo, Hi}; +balanced_bst([?POINT_LEAF(Val) | L], 1) -> + {[], L, Val, Val}. + +insert_intervals(_Ix, [], _Lo, _Hi, Tree) -> Tree; +insert_intervals(Ix, [Int|Ints], Lo, Hi, Tree) -> + insert_intervals(Ix + 1, Ints, Lo, Hi, + insert_interval(Ix, Int, Lo, Hi, Tree)). + +insert_interval(_, {Lo, Hi}, _, _, Node) when Lo > Hi -> Node; +insert_interval(I, Int={Lo,Hi}, NLo, NHi, + ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, [I|Segments]); + true -> + Left = case intervals_intersect(Lo, Hi, NLo, Mid) of + true -> insert_interval(I, Int, NLo, Mid, Left0); + false -> Left0 + end, + Right = case intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> insert_interval(I, Int, Mid+1, NHi, Right0); + false -> Right0 + end, + ?NODE(Left, Right, Mid, Segments) + end; +insert_interval(I, {_Lo,_Hi}, _NLo, _NHi, Leaf) -> [I|Leaf]. + +intervals_intersect(ALo, AHi, BLo, BHi) -> + (ALo =< AHi) andalso (BLo =< BHi) %% both nonempty + andalso nonempty_intervals_intersect(ALo, AHi, BLo, BHi). + +%% Purely optional optimisation +squash_empty_subtrees(?NODE(Left0, Right0, Mid, Segs)) -> + build_squash_node(squash_empty_subtrees(Left0), + squash_empty_subtrees(Right0), + Mid, Segs); +squash_empty_subtrees(Leaf) -> Leaf. + +build_squash_node([], [], _, Segs) -> Segs; +build_squash_node(Left, Right, Mid, Segs) -> + ?NODE(Left, Right, Mid, Segs). + +%% @doc Returns the indices of the intervals in the tree that contains Point. +-spec intersect(integer(), tree()) -> [non_neg_integer()]. +intersect(Point, nil) when is_integer(Point) -> []; +intersect(Point, #segment_tree{lo=Lo, hi=Hi, root=Root}) + when is_integer(Point) -> + case Lo =< Point andalso Point =< Hi of + false -> []; + true -> intersect_1(Point, Root, []) + end. + +intersect_1(Point, ?NODE(Left, Right, Mid, Segs), Acc0) -> + Child = if Point =< Mid -> Left; true -> Right end, + intersect_1(Point, Child, Segs ++ Acc0); +intersect_1(_, LeafSegs, Acc) -> LeafSegs ++ Acc. + +%% @doc Deletes the interval {Lo, Hi}, which had index Index in the list passed +%% to build/1. +-spec delete(non_neg_integer(), integer(), integer(), tree()) -> tree(). +delete(_, _, _, nil) -> nil; +delete(_, Lo, Hi, Tree) when Lo > Hi -> Tree; +delete(_, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi}) + when Hi < TLo; Lo > THi -> Tree; +delete(Index, Lo, Hi, Tree = #segment_tree{lo=TLo, hi=THi, root=Root0}) + when is_integer(Lo), is_integer(Hi) -> + Root = delete_1(Index, Lo, Hi, TLo, THi, Root0), + Tree#segment_tree{root=Root}. + +delete_1(I, Lo, Hi, NLo, NHi, ?NODE(Left0, Right0, Mid, Segments)) -> + if Lo =< NLo, NHi =< Hi -> + ?NODE(Left0, Right0, Mid, delete_2(Segments, I)); + true -> + Left = case nonempty_intervals_intersect(Lo, Hi, NLo, Mid) of + true -> delete_1(I, Lo, Hi, NLo, Mid, Left0); + false -> Left0 + end, + Right = case nonempty_intervals_intersect(Lo, Hi, Mid+1, NHi) of + true -> delete_1(I, Lo, Hi, Mid+1, NHi, Right0); + false -> Right0 + end, + %% We could do build_squash_node here, is it worth it? + ?NODE(Left, Right, Mid, Segments) + end; +delete_1(I, _Lo, _Hi, _NLo, _NHi, Leaf) -> delete_2(Leaf, I). + +delete_2([I|Segs], I) -> Segs; +delete_2([S|Segs], I) -> [S|delete_2(Segs,I)]. + +-compile({inline,nonempty_intervals_intersect/4}). +nonempty_intervals_intersect(ALo, AHi, BLo, BHi) -> + (BLo =< AHi) andalso (ALo =< BHi). diff --git a/lib/hipe/opt/Makefile b/lib/hipe/opt/Makefile index 684d6f45b4..5a729d04ae 100644 --- a/lib/hipe/opt/Makefile +++ b/lib/hipe/opt/Makefile @@ -43,7 +43,8 @@ RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) # ---------------------------------------------------- # Target Specs # ---------------------------------------------------- -MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan +MODULES = hipe_spillmin hipe_spillmin_color hipe_spillmin_scan \ + hipe_bb_weights HRL_FILES= ERL_FILES= $(MODULES:%=%.erl) diff --git a/lib/hipe/opt/hipe_bb_weights.erl b/lib/hipe/opt/hipe_bb_weights.erl new file mode 100644 index 0000000000..8ef113b94c --- /dev/null +++ b/lib/hipe/opt/hipe_bb_weights.erl @@ -0,0 +1,449 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% BASIC BLOCK WEIGHTING +%% +%% Computes basic block weights by using branch probabilities as weights in a +%% linear equation system, that is then solved using Gauss-Jordan Elimination. +%% +%% The equation system representation is intentionally sparse, since most blocks +%% have at most two successors. +-module(hipe_bb_weights). +-export([compute/3, compute_fast/3, weight/2, call_exn_pred/0]). +-export_type([bb_weights/0]). + +-compile(inline). + +%%-define(DO_ASSERT,1). +%%-define(DEBUG,1). +-include("../main/hipe.hrl"). + +%% If the equation system is large, it might take too long to solve it exactly. +%% Thus, if there are more than ?HEUR_MAX_SOLVE labels, we use the iterative +%% approximation. +-define(HEUR_MAX_SOLVE, 10000). + +-opaque bb_weights() :: #{label() => float()}. + +-type cfg() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. + +-type label() :: integer(). +-type var() :: label(). +-type assignment() :: {var(), float()}. +-type eq_assoc() :: [{var(), key()}]. +-type solution() :: [assignment()]. + +%% Constant. Predicted probability of a call resulting in an exception. +-spec call_exn_pred() -> float(). +call_exn_pred() -> 0.01. + +-spec compute(cfg(), target_module(), target_context()) -> bb_weights(). +compute(CFG, TgtMod, TgtCtx) -> + Target = {TgtMod, TgtCtx}, + Labels = labels(CFG, Target), + if length(Labels) > ?HEUR_MAX_SOLVE -> + ?debug_msg("~w: Too many labels (~w), approximating.~n", + [?MODULE, length(Labels)]), + compute_fast(CFG, TgtMod, TgtCtx); + true -> + {EqSys, EqAssoc} = build_eq_system(CFG, Labels, Target), + case solve(EqSys, EqAssoc) of + {ok, Solution} -> + maps:from_list(Solution) + end + end. + +-spec build_eq_system(cfg(), [label()], target()) -> {eq_system(), eq_assoc()}. +build_eq_system(CFG, Labels, Target) -> + StartLb = hipe_gen_cfg:start_label(CFG), + EQS0 = eqs_new(), + {EQS1, Assoc} = build_eq_system(Labels, CFG, Target, [], EQS0), + {StartLb, StartKey} = lists:keyfind(StartLb, 1, Assoc), + StartRow0 = eqs_get(StartKey, EQS1), + StartRow = row_set_const(-1.0, StartRow0), % -1.0 since StartLb coef is -1.0 + EQS = eqs_put(StartKey, StartRow, EQS1), + {EQS, Assoc}. + +build_eq_system([], _CFG, _Target, Map, EQS) -> {EQS, lists:reverse(Map)}; +build_eq_system([L|Ls], CFG, Target, Map, EQS0) -> + PredProb = pred_prob(L, CFG, Target), + {Key, EQS} = eqs_insert(row_new([{L, -1.0}|PredProb], 0.0), EQS0), + build_eq_system(Ls, CFG, Target, [{L, Key}|Map], EQS). + +pred_prob(L, CFG, Target) -> + [begin + BB = bb(CFG, Pred, Target), + Ps = branch_preds(hipe_bb:last(BB), Target), + ?ASSERT(length(lists:ukeysort(1, Ps)) + =:= length(hipe_gen_cfg:succ(CFG, Pred))), + case lists:keyfind(L, 1, Ps) of + {L, Prob} when is_float(Prob) -> {Pred, Prob} + end + end || Pred <- hipe_gen_cfg:pred(CFG, L)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-spec triangelise(eq_system(), eq_assoc()) -> {eq_system(), eq_assoc()}. +triangelise(EQS, VKs) -> + triangelise_1(mk_triix(EQS, VKs), []). + +triangelise_1(TIX0, Acc) -> + case triix_is_empty(TIX0) of + true -> {triix_eqs(TIX0), lists:reverse(Acc)}; + false -> + {V,Key,TIX1} = triix_pop_smallest(TIX0), + Row0 = triix_get(Key, TIX1), + case row_get(V, Row0) of + Coef when Coef > -0.0001, Coef < 0.0001 -> + throw(error); + _ -> + Row = row_normalise(V, Row0), + TIX2 = triix_put(Key, Row, TIX1), + TIX = eliminate_triix(V, Key, Row, TIX2), + triangelise_1(TIX, [{V,Key}|Acc]) + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Triangelisation maintains its own index, outside of eqs. This index is +%% essentially a BST (used as a heap) of all equations by size, with {Key,Var} +%% as the values and only containing a subset of all the keys in the whole +%% equation system. The key operation is triix_pop_smallest/1, which pops a +%% {Key,Var} from the heap corresponding to one of the smallest equations. This +%% is critical in order to prevent the equations from growing during +%% triangelisation, which would make the algorithm O(n^2) in the common case. +-type tri_eq_system() :: {eq_system(), + gb_trees:tree(non_neg_integer(), + gb_trees:tree(key(), var()))}. + +triix_eqs({EQS, _}) -> EQS. +triix_get(Key, {EQS, _}) -> eqs_get(Key, EQS). +triix_is_empty({_, Tree}) -> gb_trees:is_empty(Tree). +triix_lookup(V, {EQS, _}) -> eqs_lookup(V, EQS). + +mk_triix(EQS, VKs) -> + {EQS, + lists:foldl(fun({V,Key}, Tree) -> + Size = row_size(eqs_get(Key, EQS)), + sitree_insert(Size, Key, V, Tree) + end, gb_trees:empty(), VKs)}. + +sitree_insert(Size, Key, V, SiTree) -> + SubTree1 = + case gb_trees:lookup(Size, SiTree) of + none -> gb_trees:empty(); + {value, SubTree0} -> SubTree0 + end, + SubTree = gb_trees:insert(Key, V, SubTree1), + gb_trees:enter(Size, SubTree, SiTree). + +sitree_update_subtree(Size, SubTree, SiTree) -> + case gb_trees:is_empty(SubTree) of + true -> gb_trees:delete(Size, SiTree); + false -> gb_trees:update(Size, SubTree, SiTree) + end. + +triix_put(Key, Row, {EQS, Tree0}) -> + OldSize = row_size(eqs_get(Key, EQS)), + case row_size(Row) of + OldSize -> {eqs_put(Key, Row, EQS), Tree0}; + Size -> + Tree = + case gb_trees:lookup(OldSize, Tree0) of + none -> Tree0; + {value, SubTree0} -> + case gb_trees:lookup(Key, SubTree0) of + none -> Tree0; + {value, V} -> + SubTree = gb_trees:delete(Key, SubTree0), + Tree1 = sitree_update_subtree(OldSize, SubTree, Tree0), + sitree_insert(Size, Key, V, Tree1) + end + end, + {eqs_put(Key, Row, EQS), Tree} + end. + +triix_pop_smallest({EQS, Tree}) -> + {Size, SubTree0} = gb_trees:smallest(Tree), + {Key, V, SubTree} = gb_trees:take_smallest(SubTree0), + {V, Key, {EQS, sitree_update_subtree(Size, SubTree, Tree)}}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +row_normalise(Var, Row) -> + %% Normalise v's coef to 1.0 + %% row_set_coef ensures the coef is exactly 1.0 (no rounding errors) + row_set_coef(Var, 1.0, row_scale(Row, 1.0/row_get(Var, Row))). + +%% Precondition: Row must be normalised; i.e. Vars coef must be 1.0 (mod +%% rounding errors) +-spec eliminate(var(), key(), row(), eq_system()) -> eq_system(). +eliminate(Var, Key, Row, TIX0) -> + eliminate_abstr(Var, Key, Row, TIX0, + fun eqs_get/2, fun eqs_lookup/2, fun eqs_put/3). + +-spec eliminate_triix(var(), key(), row(), tri_eq_system()) -> tri_eq_system(). +eliminate_triix(Var, Key, Row, TIX0) -> + eliminate_abstr(Var, Key, Row, TIX0, + fun triix_get/2, fun triix_lookup/2, fun triix_put/3). + +%% The same function implemented for two data types, eqs and triix. +-compile({inline, eliminate_abstr/7}). +-spec eliminate_abstr(var(), key(), row(), ADT, fun((key(), ADT) -> row()), + fun((var(), ADT) -> [key()]), + fun((key(), row(), ADT) -> ADT)) -> ADT. +eliminate_abstr(Var, Key, Row, ADT0, GetFun, LookupFun, PutFun) -> + ?ASSERT(1.0 =:= row_get(Var, Row)), + ADT = + lists:foldl(fun(RK, ADT1) when RK =:= Key -> ADT1; + (RK, ADT1) -> + R = GetFun(RK, ADT1), + PutFun(RK, row_addmul(R, Row, -row_get(Var, R)), ADT1) + end, ADT0, LookupFun(Var, ADT0)), + [Key] = LookupFun(Var, ADT), + ADT. + +-spec solve(eq_system(), eq_assoc()) -> error | {ok, solution()}. +solve(EQS0, EqAssoc0) -> + try triangelise(EQS0, EqAssoc0) + of {EQS1, EqAssoc} -> + {ok, solve_1(EqAssoc, maps:from_list(EqAssoc), EQS1, [])} + catch error -> error + end. + +solve_1([], _VarEqs, _EQS, Acc) -> Acc; +solve_1([{V,K}|Ps], VarEqs, EQS0, Acc0) -> + Row0 = eqs_get(K, EQS0), + VarsToKill = [Var || {Var, _} <- row_coefs(Row0), Var =/= V], + Row1 = kill_vars(VarsToKill, VarEqs, EQS0, Row0), + [{V,_}] = row_coefs(Row1), % assertion + Row = row_normalise(V, Row1), + [{V,1.0}] = row_coefs(Row), % assertion + EQS = eliminate(V, K, Row, EQS0), + [K] = eqs_lookup(V, EQS), + solve_1(Ps, VarEqs, eqs_remove(K, EQS), [{V, row_const(Row)}|Acc0]). + +kill_vars([], _VarEqs, _EQS, Row) -> Row; +kill_vars([V|Vs], VarEqs, EQS, Row0) -> + VRow0 = eqs_get(maps:get(V, VarEqs), EQS), + VRow = row_normalise(V, VRow0), + ?ASSERT(1.0 =:= row_get(V, VRow)), + Row = row_addmul(Row0, VRow, -row_get(V, Row0)), + ?ASSERT(0.0 =:= row_get(V, Row)), % V has been killed + kill_vars(Vs, VarEqs, EQS, Row). + +-spec weight(label(), bb_weights()) -> float(). +weight(Lbl, Weights) -> + maps:get(Lbl, Weights). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Row datatype +%% Invariant: No 0.0 coefficiets! +-spec row_empty() -> row(). +row_empty() -> {orddict:new(), 0.0}. + +-spec row_new([{var(), float()}], float()) -> row(). +row_new(Coefs, Const) when is_float(Const) -> + row_ensure_invar({row_squash_multiples(lists:keysort(1, Coefs)), Const}). + +row_squash_multiples([{K, C1},{K, C2}|Ps]) -> + row_squash_multiples([{K,C1+C2}|Ps]); +row_squash_multiples([P|Ps]) -> [P|row_squash_multiples(Ps)]; +row_squash_multiples([]) -> []. + +row_ensure_invar({Coef, Const}) -> + {orddict:filter(fun(_, 0.0) -> false; (_, F) when is_float(F) -> true end, + Coef), Const}. + +row_const({_, Const}) -> Const. +row_coefs({Coefs, _}) -> orddict:to_list(Coefs). +row_size({Coefs, _}) -> orddict:size(Coefs). + +row_get(Var, {Coefs, _}) -> + case lists:keyfind(Var, 1, Coefs) of + false -> 0.0; + {_, Coef} -> Coef + end. + +row_set_coef(Var, 0.0, {Coefs, Const}) -> + {orddict:erase(Var, Coefs), Const}; +row_set_coef(Var, Coef, {Coefs, Const}) -> + {orddict:store(Var, Coef, Coefs), Const}. + +row_set_const(Const, {Coefs, _}) -> {Coefs, Const}. + +%% Lhs + Rhs*Factor +-spec row_addmul(row(), row(), float()) -> row(). +row_addmul({LhsCoefs, LhsConst}, {RhsCoefs, RhsConst}, Factor) + when is_float(Factor) -> + Coefs = row_addmul_coefs(LhsCoefs, RhsCoefs, Factor), + Const = LhsConst + RhsConst * Factor, + {Coefs, Const}. + +row_addmul_coefs(Ls, [], Factor) when is_float(Factor) -> Ls; +row_addmul_coefs([], Rs, Factor) when is_float(Factor) -> + row_scale_coefs(Rs, Factor); +row_addmul_coefs([L={LV, _}|Ls], Rs=[{RV,_}|_], Factor) + when LV < RV, is_float(Factor) -> + [L|row_addmul_coefs(Ls, Rs, Factor)]; +row_addmul_coefs(Ls=[{LV, _}|_], [{RV, RC}|Rs], Factor) + when LV > RV, is_float(RC), is_float(Factor) -> + [{RV, RC*Factor}|row_addmul_coefs(Ls, Rs, Factor)]; +row_addmul_coefs([{V, LC}|Ls], [{V, RC}|Rs], Factor) + when is_float(LC), is_float(RC), is_float(Factor) -> + case LC + RC * Factor of + 0.0 -> row_addmul_coefs(Ls, Rs, Factor); + C -> [{V,C}|row_addmul_coefs(Ls, Rs, Factor)] + end. + +row_scale(_, 0.0) -> row_empty(); +row_scale({RowCoefs, RowConst}, Factor) when is_float(Factor) -> + {row_scale_coefs(RowCoefs, Factor), RowConst * Factor}. + +row_scale_coefs([{V,C}|Cs], Factor) when is_float(Factor), is_float(C) -> + [{V,C*Factor}|row_scale_coefs(Cs, Factor)]; +row_scale_coefs([], Factor) when is_float(Factor) -> + []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Equation system ADT +%% +%% Stores a linear equation system, allowing for efficient updates and efficient +%% queries for all equations mentioning a variable. +%% +%% It is sort of like a "database" table of {Primary, Terms, Const} indexed both +%% on Primary as well as the vars (map keys) in Terms. +-type row() :: {Terms :: orddict:orddict(var(), float()), + Const :: float()}. +-type key() :: non_neg_integer(). +-type rev_index() :: #{var() => ordsets:ordset(key())}. +-record(eq_system, { + rows = #{} :: #{key() => row()}, + revidx = revidx_empty() :: rev_index(), + next_key = 0 :: key() + }). +-type eq_system() :: #eq_system{}. + +eqs_new() -> #eq_system{}. + +-spec eqs_insert(row(), eq_system()) -> {key(), eq_system()}. +eqs_insert(Row, EQS=#eq_system{next_key=NextKey0}) -> + Key = NextKey0, + NextKey = NextKey0 + 1, + {Key, eqs_insert(Key, Row, EQS#eq_system{next_key=NextKey})}. + +eqs_insert(Key, Row, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) -> + RevIdx = revidx_add(Key, Row, RevIdx0), + EQS#eq_system{rows=Rows#{Key => Row}, revidx=RevIdx}. + +eqs_put(Key, Row, EQS0) -> + eqs_insert(Key, Row, eqs_remove(Key, EQS0)). + +eqs_remove(Key, EQS=#eq_system{rows=Rows, revidx=RevIdx0}) -> + OldRow = maps:get(Key, Rows), + RevIdx = revidx_remove(Key, OldRow, RevIdx0), + EQS#eq_system{rows = maps:remove(Key, Rows), revidx=RevIdx}. + +-spec eqs_get(key(), eq_system()) -> row(). +eqs_get(Key, #eq_system{rows=Rows}) -> maps:get(Key, Rows). + +%% Keys of all equations containing a nonzero coefficient for Var +-spec eqs_lookup(var(), eq_system()) -> ordsets:ordset(key()). +eqs_lookup(Var, #eq_system{revidx=RevIdx}) -> maps:get(Var, RevIdx). + +%% eqs_rows(#eq_system{rows=Rows}) -> maps:to_list(Rows). + +%% eqs_print(EQS) -> +%% lists:foreach(fun({_, Row}) -> +%% row_print(Row) +%% end, lists:sort(eqs_rows(EQS))). + +%% row_print(Row) -> +%% CoefStrs = [io_lib:format("~wl~w", [Coef, Var]) +%% || {Var, Coef} <- row_coefs(Row)], +%% CoefStr = lists:join(" + ", CoefStrs), +%% io:format("~w = ~s~n", [row_const(Row), CoefStr]). + +revidx_empty() -> #{}. + +-spec revidx_add(key(), row(), rev_index()) -> rev_index(). +revidx_add(Key, Row, RevIdx0) -> + orddict:fold(fun(Var, _Coef, RevIdx1) -> + ?ASSERT(_Coef /= 0.0), + RevIdx1#{Var => ordsets:add_element( + Key, maps:get(Var, RevIdx1, ordsets:new()))} + end, RevIdx0, row_coefs(Row)). + +-spec revidx_remove(key(), row(), rev_index()) -> rev_index(). +revidx_remove(Key, {Coefs, _}, RevIdx0) -> + orddict:fold(fun(Var, _Coef, RevIdx1) -> + case RevIdx1 of + #{Var := Keys0} -> + case ordsets:del_element(Key, Keys0) of + [] -> maps:remove(Var, RevIdx1); + Keys -> RevIdx1#{Var := Keys} + end + end + end, RevIdx0, Coefs). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(FAST_ITERATIONS, 5). + +%% @doc Computes a rough approximation of BB weights. The approximation is +%% particularly poor (converges slowly) for recursive functions and loops. +-spec compute_fast(cfg(), target_module(), target_context()) -> bb_weights(). +compute_fast(CFG, TgtMod, TgtCtx) -> + Target = {TgtMod, TgtCtx}, + StartLb = hipe_gen_cfg:start_label(CFG), + RPO = reverse_postorder(CFG, Target), + PredProbs = [{L, pred_prob(L, CFG, Target)} || L <- RPO, L =/= StartLb], + Probs0 = (maps:from_list([{L, 0.0} || L <- RPO]))#{StartLb := 1.0}, + fast_iterate(?FAST_ITERATIONS, PredProbs, Probs0). + +fast_iterate(0, _Pred, Probs) -> Probs; +fast_iterate(Iters, Pred, Probs0) -> + fast_iterate(Iters-1, Pred, + fast_one(Pred, Probs0)). + +fast_one([{L, Pred}|Ls], Probs0) -> + Weight = fast_sum(Pred, Probs0, 0.0), + Probs = Probs0#{L => Weight}, + fast_one(Ls, Probs); +fast_one([], Probs) -> + Probs. + +fast_sum([{P,EWt}|Pred], Probs, Acc) when is_float(EWt), is_float(Acc) -> + case Probs of + #{P := PWt} when is_float(PWt) -> + fast_sum(Pred, Probs, Acc + PWt * EWt) + end; +fast_sum([], _Probs, Acc) when is_float(Acc) -> + Acc. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(branch_preds). +?TGT_IFACE_1(labels). +?TGT_IFACE_1(reverse_postorder). diff --git a/lib/hipe/opt/hipe_schedule.erl b/lib/hipe/opt/hipe_schedule.erl index 00ad487620..0f25940e3d 100644 --- a/lib/hipe/opt/hipe_schedule.erl +++ b/lib/hipe/opt/hipe_schedule.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -1344,10 +1337,10 @@ cd([{N,I}|Xs], DAG, PrevBr, PrevUnsafe, PrevOthers) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Function : cd_branch_to_other_deps %% Argument : N - index of branch -%% Ms - list of indexes of "others" preceeding instrs +%% Ms - list of indexes of "others" preceding instrs %% DAG - dependence graph %% Returns : DAG - new graph -%% Description : Makes preceeding instrs fixed so they don't bypass a branch +%% Description : Makes preceding instrs fixed so they don't bypass a branch %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% cd_branch_to_other_deps(_, [], DAG) -> DAG; diff --git a/lib/hipe/opt/hipe_schedule_prio.erl b/lib/hipe/opt/hipe_schedule_prio.erl index 3dcc0845e0..339bb82aab 100644 --- a/lib/hipe/opt/hipe_schedule_prio.erl +++ b/lib/hipe/opt/hipe_schedule_prio.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/opt/hipe_spillmin.erl b/lib/hipe/opt/hipe_spillmin.erl index 4eeb1d71db..b28a6bfd13 100644 --- a/lib/hipe/opt/hipe_spillmin.erl +++ b/lib/hipe/opt/hipe_spillmin.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% ========================================================================== %% Module : hipe_spillmin @@ -24,12 +18,11 @@ %% by a function. This is done using an algorithm for register %% allocation. The implementation is target-independent and %% requires a target-specific interface module as argument. -%% -%% $Id$ %% ========================================================================== %% Exported functions (short description): %% -%% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +%% stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, +%% TempMap) -> %% {Coloring, NumberOfSpills} %% Takes a CFG and the TempMap from register allocation and returns %% a coloring of stack slots. @@ -49,7 +42,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_spillmin). --export([stackalloc/6, mapmerge/2]). +-export([stackalloc/7, stackalloc/8, mapmerge/2]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -59,6 +52,8 @@ -include("../main/hipe.hrl"). -include("../flow/cfg.hrl"). +-type target_context() :: any(). + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) @@ -68,18 +63,29 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> - {hipe_spill_map(), non_neg_integer()}. + comp_options(), module(), target_context(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap) -> + Liveness = TgtMod:analyze(CFG,TgtCtx), + stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, TempMap). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> + {hipe_spill_map(), non_neg_integer()}. + +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, TgtCtx, + TempMap) -> case proplists:get_bool(spillmin_color, Options) of false -> - ?option_time(hipe_spillmin_scan:stackalloc(CFG, StackSlots, SpillIndex, - Options, Target, TempMap), + ?option_time(hipe_spillmin_scan:stackalloc( + CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, + TgtCtx, TempMap), "Spill minimize, linear scan", Options); true -> - ?option_time(hipe_spillmin_color:stackalloc(CFG, StackSlots, SpillIndex, - Options, Target, TempMap), + ?option_time(hipe_spillmin_color:stackalloc( + CFG, Liveness, StackSlots, SpillIndex, Options, TgtMod, + TgtCtx, TempMap), "Spill minimize, graph coloring", Options) end. diff --git a/lib/hipe/opt/hipe_spillmin_color.erl b/lib/hipe/opt/hipe_spillmin_color.erl index 7c23de44b4..f87d9a5b61 100644 --- a/lib/hipe/opt/hipe_spillmin_color.erl +++ b/lib/hipe/opt/hipe_spillmin_color.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% =========================================================================== %%@doc @@ -41,7 +35,7 @@ -module(hipe_spillmin_color). --export([stackalloc/6]). +-export([stackalloc/8]). %%-ifndef(DO_ASSERT). %%-define(DO_ASSERT, true). @@ -66,13 +60,17 @@ %% where Location is {spill,M}. %% {spill,M} denotes the Mth spilled node --spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> +-type target_context() :: any(). + +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, _StackSlots, SpillIndex, _Options, Target, TempMap) -> +stackalloc(CFG, Live, _StackSlots, SpillIndex, _Options, TargetMod, + TargetContext, TempMap) -> + Target = {TargetMod, TargetContext}, ?report2("building IG~n", []), - {IG, NumNodes} = build_ig(CFG, Target, TempMap), + {IG, NumNodes} = build_ig(CFG, Live, Target, TempMap), {Cols, MaxColors} = color_heuristic(IG, 0, NumNodes, NumNodes, NumNodes, Target, 1), SortedCols = lists:sort(Cols), @@ -121,7 +119,7 @@ color_heuristic(IG, Min, Max, Safe, MaxNodes, Target, MaxDepth) -> end; _ -> %% This can be increased from 2, and by this the heuristic can be - %% exited earlier, but the same can be achived by decreasing the + %% exited earlier, but the same can be achieved by decreasing the %% recursion depth. This should not be decreased below 2. case (Max - Min) < 2 of true -> @@ -167,10 +165,14 @@ remap_temp_map0(Cols, [_Y|Ys], SpillIndex) -> %% Returns {Interference_graph, Number_Of_Nodes} %% -build_ig(CFG, Target, TempMap) -> - try build_ig0(CFG, Target, TempMap) - catch error:Rsn -> exit({regalloc, build_ig, Rsn}) - end. +build_ig(CFG, Live, Target, TempMap) -> + TempMapping = map_spilled_temporaries(TempMap), + TempMappingTable = setup_ets(TempMapping), + NumSpilled = length(TempMapping), + IG = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumSpilled), + Target, TempMap, TempMappingTable), + ets:delete(TempMappingTable), + {normalize_ig(IG), NumSpilled}. %% Creates an ETS table consisting of the keys given in List, with the values %% being an integer which is the position of the key in List. @@ -185,16 +187,6 @@ setup_ets0([X|Xs], Table, N) -> ets:insert(Table, {X, N}), setup_ets0(Xs, Table, N+1). -build_ig0(CFG, Target, TempMap) -> - Live = Target:analyze(CFG), - TempMapping = map_spilled_temporaries(TempMap), - TempMappingTable = setup_ets(TempMapping), - NumSpilled = length(TempMapping), - IG = build_ig_bbs(Target:labels(CFG), CFG, Live, empty_ig(NumSpilled), - Target, TempMap, TempMappingTable), - ets:delete(TempMappingTable), - {normalize_ig(IG), NumSpilled}. - build_ig_bbs([], _CFG, _Live, IG, _Target, _TempMap, _TempMapping) -> IG; build_ig_bbs([L|Ls], CFG, Live, IG, Target, TempMap, TempMapping) -> @@ -215,16 +207,26 @@ build_ig_bb([X|Xs], LiveOut, IG, Target, TempMap, TempMapping) -> build_ig_bb(Xs, LiveOut, IG, Target, TempMap, TempMapping), build_ig_instr(X, Live, NewIG, Target, TempMap, TempMapping). -build_ig_instr(X, Live, IG, Target, TempMap, TempMapping) -> +build_ig_instr(X, Live0, IG0, Target, TempMap, TempMapping) -> {Def, Use} = def_use(X, Target, TempMap), - ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live, X, Def,Use]), + ?report3("Live ~w\n~w : Def: ~w Use ~w\n",[Live0, X, Def,Use]), DefListMapped = list_map(Def, TempMapping, []), UseListMapped = list_map(Use, TempMapping, []), DefSetMapped = ordsets:from_list(DefListMapped), UseSetMapped = ordsets:from_list(UseListMapped), - NewIG = interference_arcs(DefListMapped, ordsets:to_list(Live), IG), - NewLive = ordsets:union(UseSetMapped, ordsets:subtract(Live, DefSetMapped)), - {NewLive, NewIG}. + {Live1, IG1} = + analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped), + IG = interference_arcs(DefListMapped, ordsets:to_list(Live1), IG1), + Live = ordsets:union(UseSetMapped, ordsets:subtract(Live1, DefSetMapped)), + {Live, IG}. + +analyze_move(X, Live0, IG0, Target, DefSetMapped, UseSetMapped) -> + case {is_spill_move(X, Target), DefSetMapped, UseSetMapped} of + {true, [Dst], [Src]} -> + {ordsets:del_element(Src, Live0), add_move(Src, Dst, IG0)}; + {_, _, _} -> + {Live0, IG0} + end. %% Given a list of Keys and an ets-table returns a list of the elements %% in Mapping corresponding to the Keys and appends Acc to this list. @@ -274,15 +276,6 @@ i_arcs(X, [Y|Ys], IG) -> %% throw an exception (the caller should retry with more stack slots) color(IG, StackSlots, NumNodes, Target) -> - try - color_0(IG, StackSlots, NumNodes, Target) - catch - error:Rsn -> - ?error_msg("Coloring failed with ~p~n", [Rsn]), - ?EXIT(Rsn) - end. - -color_0(IG, StackSlots, NumNodes, Target) -> ?report("simplification of IG~n", []), K = ordsets:size(StackSlots), Nodes = list_ig(IG), @@ -385,7 +378,8 @@ select_colors([{X,colorable}|Xs], IG, Cols, PhysRegs) -> select_color(X, IG, Cols, PhysRegs) -> UsedColors = get_colors(neighbors(X, IG), Cols), - Reg = select_unused_color(UsedColors, PhysRegs), + Preferences = get_colors(move_connected(X, IG), Cols), + Reg = select_unused_color(UsedColors, Preferences, PhysRegs), {Reg, set_color(X, Reg, Cols)}. %%%%%%%%%%%%%%%%%%%% @@ -399,10 +393,14 @@ get_colors([X|Xs], Cols) -> [R|get_colors(Xs, Cols)] end. -select_unused_color(UsedColors, PhysRegs) -> +select_unused_color(UsedColors, Preferences, PhysRegs) -> Summary = ordsets:from_list(UsedColors), - AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), - hd(AvailRegs). + case ordsets:subtract(ordsets:from_list(Preferences), Summary) of + [PreferredColor|_] -> PreferredColor; + _ -> + AvailRegs = ordsets:to_list(ordsets:subtract(PhysRegs, Summary)), + hd(AvailRegs) + end. push_colored(X, Stk) -> [{X, colorable} | Stk]. @@ -459,7 +457,11 @@ init_stackslots(NumSlots, Acc) -> %% %% Note: later on, we may wish to add 'move-related' support. --record(ig_info, {neighbors = [] :: [_], degree = 0 :: non_neg_integer()}). +-record(ig_info, { + neighbors = [] :: [_], + degree = 0 :: non_neg_integer(), + move_connected = [] :: [_] + }). empty_ig(NumNodes) -> hipe_vectors:new(NumNodes, #ig_info{}). @@ -470,16 +472,29 @@ degree(Info) -> neighbors(Info) -> Info#ig_info.neighbors. +move_connected(Info) -> + Info#ig_info.move_connected. + add_edge(X, X, IG) -> IG; add_edge(X, Y, IG) -> add_arc(X, Y, add_arc(Y, X, IG)). +add_move(X, X, IG) -> IG; +add_move(X, Y, IG) -> + add_move_arc(X, Y, add_move_arc(Y, X, IG)). + add_arc(X, Y, IG) -> Info = hipe_vectors:get(IG, X), Old = neighbors(Info), New = Info#ig_info{neighbors = [Y|Old]}, hipe_vectors:set(IG,X,New). +add_move_arc(X, Y, IG) -> + Info = hipe_vectors:get(IG, X), + Old = move_connected(Info), + New = Info#ig_info{move_connected = [Y|Old]}, + hipe_vectors:set(IG,X,New). + normalize_ig(IG) -> Size = hipe_vectors:size(IG), normalize_ig(Size-1, IG). @@ -489,7 +504,8 @@ normalize_ig(-1, IG) -> normalize_ig(I, IG) -> Info = hipe_vectors:get(IG, I), N = ordsets:from_list(neighbors(Info)), - NewInfo = Info#ig_info{neighbors = N, degree = length(N)}, + M = ordsets:subtract(ordsets:from_list(move_connected(Info)), N), + NewInfo = Info#ig_info{neighbors = N, degree = length(N), move_connected = M}, NewIG = hipe_vectors:set(IG, I, NewInfo), normalize_ig(I-1, NewIG). @@ -497,6 +513,10 @@ neighbors(X, IG) -> Info = hipe_vectors:get(IG, X), Info#ig_info.neighbors. +move_connected(X, IG) -> + Info = hipe_vectors:get(IG, X), + Info#ig_info.move_connected. + decrement_degree(X, IG) -> Info = hipe_vectors:get(IG, X), Degree = degree(Info), @@ -540,18 +560,24 @@ is_visited(X, Vis) -> %% *** INTERFACES TO OTHER MODULES *** %% -liveout(CFG, L, Target) -> - ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +labels(CFG, {TgtMod,TgtCtx}) -> + TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). -bb(CFG, L, Target) -> - hipe_bb:code(Target:bb(CFG, L)). +bb(CFG, L, {TgtMod,TgtCtx}) -> + hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). -def_use(X, Target, TempMap) -> - Defines = [Y || Y <- reg_names(Target:defines(X), Target), +def_use(X, Target={TgtMod,TgtCtx}, TempMap) -> + Defines = [Y || Y <- reg_names(TgtMod:defines(X,TgtCtx), Target), hipe_temp_map:is_spilled(Y, TempMap)], - Uses = [Z || Z <- reg_names(Target:uses(X), Target), + Uses = [Z || Z <- reg_names(TgtMod:uses(X,TgtCtx), Target), hipe_temp_map:is_spilled(Z, TempMap)], {Defines, Uses}. -reg_names(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +is_spill_move(Instr, {TgtMod,TgtCtx}) -> + TgtMod:is_spill_move(Instr, TgtCtx). diff --git a/lib/hipe/opt/hipe_spillmin_scan.erl b/lib/hipe/opt/hipe_spillmin_scan.erl index 06b68e1934..484b05b790 100644 --- a/lib/hipe/opt/hipe_spillmin_scan.erl +++ b/lib/hipe/opt/hipe_spillmin_scan.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% =========================================================================== %% Copyright (c) 2002 by Niklas Andersson, Andreas Lundin, and Erik Johansson. @@ -60,7 +54,7 @@ -module(hipe_spillmin_scan). --export([stackalloc/6]). +-export([stackalloc/8]). %%-define(DEBUG, 1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -70,6 +64,8 @@ -include("../main/hipe.hrl"). -include("../flow/cfg.hrl"). +-type target_context() :: any(). + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) @@ -85,15 +81,14 @@ %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --spec stackalloc(#cfg{}, [_], non_neg_integer(), - comp_options(), module(), hipe_temp_map()) -> +-spec stackalloc(#cfg{}, _, [_], non_neg_integer(), + comp_options(), module(), target_context(), hipe_temp_map()) -> {hipe_spill_map(), non_neg_integer()}. -stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> +stackalloc(CFG, Liveness, StackSlots, SpillIndex, Options, TargetMod, + TargetContext, TempMap) -> + Target = {TargetMod, TargetContext}, ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), - %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), USIntervals = calculate_intervals(CFG, Liveness, Options, Target, TempMap), %% ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -124,8 +119,8 @@ stackalloc(CFG, StackSlots, SpillIndex, Options, Target, TempMap) -> %% all other. %%- - - - - - - - - - - - - - - - - - - - - - - - calculate_intervals(CFG, Liveness, _Options, Target, TempMap) -> - Interval = empty_interval(Target:number_of_temporaries(CFG)), - Worklist = Target:reverse_postorder(CFG), + Interval = empty_interval(number_of_temporaries(CFG, Target)), + Worklist = reverse_postorder(CFG, Target), intervals(Worklist, Interval, 1, CFG, Liveness, Target, TempMap). %%- - - - - - - - - - - - - - - - - - - - - - - - @@ -538,23 +533,26 @@ extend_interval(Pos, {Beginning, End}) %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> - Target:analyze(CFG). +bb(CFG, L, {TgtMod,TgtCtx}) -> + TgtMod:bb(CFG, L, TgtCtx). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:livein(Liveness, L, TgtCtx), Target). -bb(CFG, L, Target) -> - Target:bb(CFG, L). +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:liveout(Liveness, L, TgtCtx), Target). -livein(Liveness, L, Target) -> - regnames(Target:livein(Liveness, L), Target). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -liveout(Liveness, L, Target) -> - regnames(Target:liveout(Liveness, L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:uses(I,TgtCtx), Target). -uses(I, Target) -> - regnames(Target:uses(I), Target). +defines(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:defines(I,TgtCtx), Target). -defines(I, Target) -> - regnames(Target:defines(I), Target). +regnames(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -regnames(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reverse_postorder(CFG, {TgtMod,TgtCtx}) -> + TgtMod:reverse_postorder(CFG, TgtCtx). diff --git a/lib/hipe/opt/hipe_target_machine.erl b/lib/hipe/opt/hipe_target_machine.erl index f64bb8b518..75993cb95e 100644 --- a/lib/hipe/opt/hipe_target_machine.erl +++ b/lib/hipe/opt/hipe_target_machine.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/opt/hipe_ultra_mod2.erl b/lib/hipe/opt/hipe_ultra_mod2.erl index f28c4e6939..cec9c56a1e 100644 --- a/lib/hipe/opt/hipe_ultra_mod2.erl +++ b/lib/hipe/opt/hipe_ultra_mod2.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/opt/hipe_ultra_prio.erl b/lib/hipe/opt/hipe_ultra_prio.erl index 423dc0d6bf..6dd240a33a 100644 --- a/lib/hipe/opt/hipe_ultra_prio.erl +++ b/lib/hipe/opt/hipe_ultra_prio.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/ppc/Makefile b/lib/hipe/ppc/Makefile index 1901dfa671..1ca1d51846 100644 --- a/lib/hipe/ppc/Makefile +++ b/lib/hipe/ppc/Makefile @@ -63,6 +63,7 @@ MODULES=hipe_ppc \ hipe_ppc_ra_postconditions \ hipe_ppc_ra_postconditions_fp \ hipe_ppc_registers \ + hipe_ppc_subst \ hipe_rtl_to_ppc HRL_FILES=hipe_ppc.hrl diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl index 380e791bc1..63ecd0a0b8 100644 --- a/lib/hipe/ppc/hipe_ppc.erl +++ b/lib/hipe/ppc/hipe_ppc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,10 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% - -module(hipe_ppc). -export([ @@ -106,6 +98,9 @@ pseudo_move_dst/1, pseudo_move_src/1, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, pseudo_tailcall_func/1, pseudo_tailcall_stkargs/1, @@ -139,6 +134,9 @@ pseudo_fmove_dst/1, pseudo_fmove_src/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + mk_defun/8, defun_mfa/1, defun_formals/1, @@ -420,6 +418,10 @@ is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. pseudo_move_src(#pseudo_move{src=Src}) -> Src. +mk_pseudo_spill_move(Dst, Temp, Src) -> + #pseudo_spill_move{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage) -> #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage}. pseudo_tailcall_func(#pseudo_tailcall{func=FunC}) -> FunC. @@ -503,6 +505,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. +mk_pseudo_spill_fmove(Dst, Temp, Src) -> + #pseudo_spill_fmove{dst=Dst, temp=Temp, src=Src}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> #defun{mfa=MFA, formals=Formals, code=Code, data=Data, isclosure=IsClosure, isleaf=IsLeaf, diff --git a/lib/hipe/ppc/hipe_ppc.hrl b/lib/hipe/ppc/hipe_ppc.hrl index aa8ff4a3f7..3eef8be487 100644 --- a/lib/hipe/ppc/hipe_ppc.hrl +++ b/lib/hipe/ppc/hipe_ppc.hrl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,10 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% - %%%-------------------------------------------------------------------- %%% Basic Values: @@ -95,6 +87,7 @@ -record(pseudo_call_prepare, {nrstkargs}). -record(pseudo_li, {dst, imm}). -record(pseudo_move, {dst, src}). +-record(pseudo_spill_move, {dst, temp, src}). -record(pseudo_tailcall, {func, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(store, {stop, src, disp, base}). % non-indexed, non-update form @@ -107,6 +100,7 @@ -record(fp_binary, {fp_binop, dst, src1, src2}). -record(fp_unary, {fp_unop, dst, src}). -record(pseudo_fmove, {dst, src}). +-record(pseudo_spill_fmove, {dst, temp, src}). %%% Function definitions. diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl index d89ff6235c..b0f57e5582 100644 --- a/lib/hipe/ppc/hipe_ppc_assemble.erl +++ b/lib/hipe/ppc/hipe_ppc_assemble.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,10 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% - -module(hipe_ppc_assemble). -export([assemble/4]). @@ -40,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, hipe_rtl_arch:word_size()), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl index 34d4bf54c5..d44d38f38d 100644 --- a/lib/hipe/ppc/hipe_ppc_cfg.erl +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,25 +11,24 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_cfg). -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1]). -export([linearise/1, params/1, reverse_postorder/1]). --export([arity/1]). -%%%-export([redirect_jmp/3, arity/1]). +-export([redirect_jmp/3, arity/1]). +-export([branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_ppc.hrl"). -include("../flow/cfg.hrl"). @@ -80,11 +75,30 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #bctr{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_bc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. --ifdef(notdef). redirect_jmp(I, Old, New) -> case I of #b_label{label=Label} -> @@ -98,10 +112,16 @@ redirect_jmp(I, Old, New) -> if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; true -> I1 end; - %% handle pseudo_call too? - _ -> I + #pseudo_call{sdesc=SDesc0, contlab=ContLab0} -> + SDesc = case SDesc0 of + #ppc_sdesc{exnlab=Old} -> SDesc0#ppc_sdesc{exnlab=New}; + #ppc_sdesc{exnlab=_} -> SDesc0 + end, + ContLab = if Old =:= ContLab0 -> New; + true -> ContLab0 + end, + I#pseudo_call{sdesc=SDesc, contlab=ContLab} end. --endif. mk_goto(Label) -> hipe_ppc:mk_b_label(Label). diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl index 77b84dc574..d8a864f7d5 100644 --- a/lib/hipe/ppc/hipe_ppc_defuse.erl +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,13 +11,11 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). -include("hipe_ppc.hrl"). @@ -47,11 +41,15 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_li{dst=Dst} -> [Dst]; #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_spill_move{dst=Dst,temp=Temp} -> [Dst, Temp]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #unary{dst=Dst} -> [Dst]; _ -> [] end. +insn_defs_all_gpr(#pseudo_call{}) -> true; +insn_defs_all_gpr(_) -> false. + call_clobbered_gpr() -> [hipe_ppc:mk_temp(R, T) || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -74,6 +72,7 @@ insn_use_gpr(I) -> #mtspr{src=Src} -> [Src]; #pseudo_call{sdesc=#ppc_sdesc{arity=Arity}} -> arity_use_gpr(Arity); #pseudo_move{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{arity=Arity,stkargs=StkArgs} -> addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), arity_use_gpr(Arity))); #store{src=Src,base=Base} -> addtemp(Src, [Base]); @@ -113,9 +112,13 @@ insn_def_fpr(I) -> #fp_binary{dst=Dst} -> [Dst]; #fp_unary{dst=Dst} -> [Dst]; #pseudo_fmove{dst=Dst} -> [Dst]; + #pseudo_spill_fmove{dst=Dst,temp=Temp} -> [Dst, Temp]; _ -> [] end. +insn_defs_all_fpr(#pseudo_call{}) -> true; +insn_defs_all_fpr(_) -> false. + call_clobbered_fpr() -> [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. @@ -126,6 +129,7 @@ insn_use_fpr(I) -> #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); #fp_unary{src=Src} -> [Src]; #pseudo_fmove{src=Src} -> [Src]; + #pseudo_spill_fmove{src=Src} -> [Src]; _ -> [] end. diff --git a/lib/hipe/ppc/hipe_ppc_encode.erl b/lib/hipe/ppc/hipe_ppc_encode.erl index 793f6ccc02..1d0ce4f510 100644 --- a/lib/hipe/ppc/hipe_ppc_encode.erl +++ b/lib/hipe/ppc/hipe_ppc_encode.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 4 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Encode symbolic PowerPC instructions to binary form. %%% Copyright (C) 2003-2005, 2009 Mikael Pettersson diff --git a/lib/hipe/ppc/hipe_ppc_finalise.erl b/lib/hipe/ppc/hipe_ppc_finalise.erl index 8bb9520f89..8db2bf48a5 100644 --- a/lib/hipe/ppc/hipe_ppc_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_finalise). -export([finalise/1]). diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index ff0450270f..b88b75a5bd 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,25 +11,20 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_frame). -export([frame/1]). -include("hipe_ppc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_ppc:defun_formals(Defun)), - Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_ppc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), - CFG0 = hipe_ppc_cfg:init(Defun), - Liveness = hipe_ppc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), - hipe_ppc_cfg:linearise(CFG1). + ClobbersLR = clobbers_lr(CFG), + Liveness = hipe_ppc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersLR). fix_formals(Formals) -> fix_formals(hipe_ppc_registers:nr_args(), Formals). @@ -44,27 +35,16 @@ fix_formals(_, []) -> []. do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> Context = mk_context(Liveness, Formals, Temps, ClobbersLR), - CFG1 = do_blocks(CFG0, Context), + CFG1 = hipe_ppc_cfg:map_bbs( + fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG0), do_prologue(CFG1, Context). -do_blocks(CFG, Context) -> - Labels = hipe_ppc_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). - -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), - Block = hipe_ppc_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -86,10 +66,14 @@ do_insn(I, LiveOut, Context, FPoff) -> do_pseudo_call_prepare(I, FPoff); #pseudo_move{} -> {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #pseudo_fmove{} -> {do_pseudo_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; _ -> {[I], FPoff} end. @@ -111,7 +95,26 @@ do_pseudo_move(I, Context, FPoff) -> Offset = pseudo_offset(Src, FPoff, Context), mk_load(hipe_ppc:ldop_word(), Dst, Offset, mk_sp(), []); _ -> - [hipe_ppc:mk_alu('or', Dst, Src, Src)] + case hipe_ppc:temp_reg(Dst) =:= hipe_ppc:temp_reg(Src) of + true -> []; + false -> [hipe_ppc:mk_alu('or', Dst, Src, Src)] + end + end + end. + +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{dst=Dst,temp=Temp,src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_ppc:mk_pseudo_move(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load(hipe_ppc:ldop_word(), Temp, SrcOffset, mk_sp(), + mk_store(hipe_ppc:stop_word(), Temp, DstOffset, mk_sp(), [])) end end. @@ -132,6 +135,22 @@ do_pseudo_fmove(I, Context, FPoff) -> end end. +do_pseudo_spill_fmove(I, Context, FPoff) -> + #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_fmove(hipe_ppc:mk_pseudo_fmove(Dst, Src), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + hipe_ppc:mk_fload(Temp, SrcOffset, mk_sp(), 0) + ++ hipe_ppc:mk_fstore(Temp, DstOffset, mk_sp(), 0) + end + end. + pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). @@ -573,29 +592,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers LR. %%% -clobbers_lr([I|Insns]) -> - case I of - #pseudo_call{} -> true; - %% mtspr to lr cannot occur yet - _ -> clobbers_lr(Insns) - end; -clobbers_lr([]) -> false. +clobbers_lr(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true + end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_ppc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -624,16 +655,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_ppc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/ppc/hipe_ppc_liveness_all.erl b/lib/hipe/ppc/hipe_ppc_liveness_all.erl index cab7605967..42138eea08 100644 --- a/lib/hipe/ppc/hipe_ppc_liveness_all.erl +++ b/lib/hipe/ppc/hipe_ppc_liveness_all.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_liveness_all). -export([analyse/1]). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl index 1437e27508..eeca0e523e 100644 --- a/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl +++ b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_liveness_fpr). -export([analyse/1]). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl index 074fada918..ab9d28266c 100644 --- a/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl +++ b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_liveness_gpr). -export([analyse/1]). diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl index fd5cc2befb..a094aa65f7 100644 --- a/lib/hipe/ppc/hipe_ppc_main.erl +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,17 +11,16 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_main). -export([rtl_to_ppc/3]). rtl_to_ppc(MFA, RTL, Options) -> PPC1 = hipe_rtl_to_ppc:translate(RTL), - PPC2 = hipe_ppc_ra:ra(PPC1, Options), - PPC3 = hipe_ppc_frame:frame(PPC2), + PPC1CFG = hipe_ppc_cfg:init(PPC1), + PPC2CFG = hipe_ppc_ra:ra(PPC1CFG, Options), + PPC3CFG = hipe_ppc_frame:frame(PPC2CFG), + PPC3 = hipe_ppc_cfg:linearise(PPC3CFG), PPC4 = hipe_ppc_finalise:finalise(PPC3), ppc_pp(PPC4, MFA, Options), {native, powerpc, {unprofiled, PPC4}}. diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl index e69e6b64a2..4ee91f771e 100644 --- a/lib/hipe/ppc/hipe_ppc_pp.erl +++ b/lib/hipe/ppc/hipe_ppc_pp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_pp). -export([pp/1, pp/2, pp_insn/1]). @@ -170,6 +163,12 @@ pp_insn(Dev, I, Pre) -> io:format(Dev, ", ", []), pp_temp(Dev, Base2), io:format(Dev, "\n", []); + #unary{unop={UnOp,I1,I2,I3}, dst=Dst, src=Src} -> + io:format(Dev, "\t~s ", [UnOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ~s, ~s, ~s\n", [to_hex(I1),to_hex(I2),to_hex(I3)]); #unary{unop=UnOp, dst=Dst, src=Src} -> io:format(Dev, "\t~w ", [unop_name(UnOp)]), pp_temp(Dev, Dst), diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl index 87c776f5d1..b8daf72cef 100644 --- a/lib/hipe/ppc/hipe_ppc_ra.erl +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,43 +11,44 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_ppc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + FPLiveness0 = hipe_ppc_specific_fp:analyze(CFG0, no_context), + hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, hipe_coalescing_regalloc, - hipe_ppc_specific_fp); + hipe_ppc_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_ppc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG1)), + GPLiveness1 = hipe_ppc_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, + hipe_graph_coloring_regalloc); linear_scan -> - hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_ppc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_ppc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_ppc_pp:pp(Defun2), - hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_ppc_pp:pp(hipe_ppc_cfg:linearise(CFG2)), + hipe_ppc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_ppc_specific, no_context). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl index ea163221c2..bca504d754 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,21 +11,19 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_ra_finalise). -export([finalise/3]). -include("hipe_ppc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_ppc:defun_code(Defun), - {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(ppc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_ppc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); @@ -47,6 +41,7 @@ ra_insn(I, Map, FPMap) -> #mtspr{} -> ra_mtspr(I, Map); #pseudo_li{} -> ra_pseudo_li(I, Map); #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #store{} -> ra_store(I, Map); #storex{} -> ra_storex(I, Map); @@ -58,6 +53,7 @@ ra_insn(I, Map, FPMap) -> #fp_binary{} -> ra_fp_binary(I, FPMap); #fp_unary{} -> ra_fp_unary(I, FPMap); #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); + #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap); _ -> I end. @@ -104,6 +100,12 @@ ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> NewSrc = ra_temp(Src, Map), I#pseudo_move{dst=NewDst,src=NewSrc}. +ra_pseudo_spill_move(I=#pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewTemp = ra_temp(Temp, Map), + NewSrc = ra_temp(Src, Map), + I#pseudo_spill_move{dst=NewDst,temp=NewTemp,src=NewSrc}. + ra_pseudo_tailcall(I=#pseudo_tailcall{stkargs=StkArgs}, Map) -> NewStkArgs = ra_args(StkArgs, Map), I#pseudo_tailcall{stkargs=NewStkArgs}. @@ -162,6 +164,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, FPMap) -> NewSrc = ra_temp_fp(Src, FPMap), I#pseudo_fmove{dst=NewDst,src=NewSrc}. +ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src}, + FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewTemp = ra_temp_fp(Temp, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#pseudo_spill_fmove{dst=NewDst,temp=NewTemp,src=NewSrc}. + ra_args([Arg|Args], Map) -> [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; ra_args([], _) -> diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl index 6e8304467e..d8b2087919 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_ls.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,43 +11,39 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Linear Scan register allocator for PowerPC -module(hipe_ppc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_ppc_cfg:init(NewDefun), - SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_ppc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_ppc_registers:allocatable_gpr()-- [hipe_ppc_registers:temp3(), hipe_ppc_registers:temp2(), hipe_ppc_registers:temp1()], [hipe_ppc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_ppc_specific), - {NewDefun, _DidSpill} = + hipe_ppc_specific, no_context), + {NewCFG, _DidSpill} = hipe_ppc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_ppc_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_ppc_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl index 24995be252..dee89f66f9 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_naive.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,16 +11,13 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_ppc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl index 0b16ec3891..0a97129666 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_ra_postconditions). @@ -25,17 +18,13 @@ -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +33,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); @@ -61,6 +57,7 @@ do_insn(I, TempMap, Strategy) -> #mtspr{} -> do_mtspr(I, TempMap, Strategy); #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #store{} -> do_store(I, TempMap, Strategy); #storex{} -> do_storex(I, TempMap, Strategy); #unary{} -> do_unary(I, TempMap, Strategy); @@ -121,18 +118,25 @@ do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move and pseudo_tailcall are special cases: in - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move, pseudo_spill_move, and pseudo_tailcall are + %% special cases: in all other instructions, all temps + %% must be non-pseudos after register allocation. + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{dst=Dst,temp=Temp,src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + do_store(I=#store{src=Src,base=Base}, TempMap, Strategy) -> {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy), diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl index 821aa66c11..7342053620 100644 --- a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,21 +11,21 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_ra_postconditions_fp). -export([check_and_rewrite/2]). -include("hipe_ppc.hrl"). -check_and_rewrite(Defun, Coloring) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(ppc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. +check_and_rewrite(CFG, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp, no_context), + do_bbs(hipe_ppc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_ppc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_ppc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), @@ -46,6 +42,7 @@ do_insn(I, TempMap) -> #fp_binary{} -> do_fp_binary(I, TempMap); #fp_unary{} -> do_fp_unary(I, TempMap); #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); + #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap); _ -> {[I], false} end. @@ -85,15 +82,22 @@ do_fp_unary(I=#fp_unary{dst=Dst,src=Src}, TempMap) -> {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. do_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, TempMap) -> - case temp_is_spilled(Dst, TempMap) of - true -> - {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), - NewI = I#pseudo_fmove{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_fmove + Temp = clone(Src), + NewI = #pseudo_spill_fmove{dst=Dst,temp=Temp,src=Src}, + {[NewI], true}; _ -> {[I], false} end. +do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + %%% Fix Dst and Src operands. fix_src(Src, TempMap) -> diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl index f4781d5ed7..86bea784f1 100644 --- a/lib/hipe/ppc/hipe_ppc_registers.erl +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_registers). @@ -201,6 +194,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_ppc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [{?R0,tagged},{?R0,untagged}, %% R1 is reserved for C diff --git a/lib/hipe/ppc/hipe_ppc_subst.erl b/lib/hipe/ppc/hipe_ppc_subst.erl new file mode 100644 index 0000000000..e282b22774 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_subst.erl @@ -0,0 +1,79 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-module(hipe_ppc_subst). +-export([insn_temps/2]). +-include("hipe_ppc.hrl"). + +%% These should be moved to hipe_ppc and exported +-type temp() :: #ppc_temp{}. +-type oper() :: temp() | #ppc_simm16{} | #ppc_uimm16{}. +-type arg() :: temp() | integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + A = fun(O) -> arg_temps(T, O) end, + O = fun(O) -> oper_temps(T, O) end, + case I of + #alu{dst=D,src1=L,src2=R} -> I#alu{dst=T(D),src1=T(L),src2=O(R)}; + #b_label{} -> I; + %% #bc{} -> I; + #bctr{} -> I; + #blr{} -> I; + #cmp{src1=L,src2=R} -> I#cmp{src1=T(L),src2=O(R)}; + #comment{} -> I; + #label{} -> I; + #load{dst=D,base=B} -> I#load{dst=T(D),base=T(B)}; + #loadx{dst=D,base1=L,base2=R} -> I#loadx{dst=T(D),base1=T(L),base2=T(R)}; + #mfspr{dst=D} -> I#mfspr{dst=T(D)}; + #mtcr{src=S} -> I#mtcr{src=T(S)}; + #mtspr{src=S} -> I#mtspr{src=T(S)}; + #pseudo_bc{} -> I; + #pseudo_call{func=F} when not is_record(F, ppc_temp) -> I; + #pseudo_call_prepare{} -> I; + #pseudo_li{dst=D} -> I#pseudo_li{dst=T(D)}; + #pseudo_move{dst=D,src=S} -> I#pseudo_move{dst=T(D),src=T(S)}; + #pseudo_spill_move{dst=D,temp=U,src=S} -> + I#pseudo_spill_move{dst=T(D),temp=T(U),src=T(S)}; + #pseudo_tailcall{func=F,stkargs=Stk} when not is_record(F, ppc_temp) -> + I#pseudo_tailcall{stkargs=lists:map(A,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #store{src=S,base=B} -> I#store{src=T(S),base=T(B)}; + #storex{src=S,base1=L,base2=R} -> + I#storex{src=T(S),base1=T(L),base2=T(R)}; + #unary{dst=D,src=S} -> I#unary{dst=T(D),src=T(S)}; + #lfd{dst=D,base=B} -> I#lfd{dst=T(D),base=T(B)}; + #lfdx{dst=D,base1=L,base2=R} -> I#lfdx{dst=T(D),base1=T(L),base2=T(R)}; + #stfd{src=S,base=B} -> I#stfd{src=T(S),base=T(B)}; + #stfdx{src=S,base1=L,base2=R} -> I#stfdx{src=T(S),base1=T(L),base2=T(R)}; + #fp_binary{dst=D,src1=L,src2=R} -> + I#fp_binary{dst=T(D),src1=T(L),src2=T(R)}; + #fp_unary{dst=D,src=S} -> I#fp_unary{dst=T(D),src=T(S)}; + #pseudo_fmove{dst=D,src=S} -> I#pseudo_fmove{dst=T(D),src=T(S)}; + #pseudo_spill_fmove{dst=D,temp=U,src=S} -> + I#pseudo_spill_fmove{dst=T(D),temp=T(U),src=T(S)} + end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(SubstTemp, T=#ppc_temp{}) -> SubstTemp(T); +oper_temps(_SubstTemp, I=#ppc_simm16{}) -> I; +oper_temps(_SubstTemp, I=#ppc_uimm16{}) -> I. + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#ppc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl index a994659616..c0010a8690 100644 --- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %%% See the License for the specific language governing permissions and %%% limitations under the License. %%% -%%% %CopyrightEnd% -%%% %%% The PowerPC instruction set is quite irregular. %%% The following quirks must be handled by the translation: %%% @@ -80,7 +74,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -441,36 +434,53 @@ mk_alu_rr(Dst, Src1, RtlAluOp, Src2) -> conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), - {AluOp, BCond} = - case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of - {'add', 'ltu'} -> - {'addc', 'eq'}; - {RtlAlubOp, RtlAlubCond} -> - {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} - end, - BC = mk_pseudo_bc(BCond, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I)), - I2 = - case {AluOp, BCond} of - {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC - TmpR = new_untagged_temp(), - [hipe_ppc:mk_mfspr(TmpR, 'xer'), - hipe_ppc:mk_mtcr(TmpR) | - BC]; - _ -> BC - end, - {NewSrc1, NewSrc2} = - case AluOp of - 'subf' -> {Src2, Src1}; - _ -> {Src1, Src2} - end, - I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), - {I1 ++ I2, Map2, Data}. + HasDst = hipe_rtl:alub_has_dst(I), + {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), + RtlAlubOp = hipe_rtl:alub_op(I), + RtlAlubCond = hipe_rtl:alub_cond(I), + case {HasDst, RtlAlubOp} of + {false, sub} -> + {BCond,Sign} = conv_branch_cond(RtlAlubCond), + I2 = mk_branch(Src1, BCond, Sign, Src2, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + {I2, Map1, Data}; + _ -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + {AluOp, BCond} = + case {RtlAlubOp, RtlAlubCond} of + {'add', 'ltu'} -> + {'addc', 'eq'}; + {_, _} -> + {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} + end, + BC = mk_pseudo_bc(BCond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + I2 = + case {AluOp, BCond} of + {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC + TmpR = new_untagged_temp(), + [hipe_ppc:mk_mfspr(TmpR, 'xer'), + hipe_ppc:mk_mtcr(TmpR) | + BC]; + _ -> BC + end, + {NewSrc1, NewSrc2} = + case AluOp of + 'subf' -> {Src2, Src1}; + _ -> {Src1, Src2} + end, + I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), + {I1 ++ I2, Map2, Data} + end. conv_alub_op(RtlAluOp) -> case {get(hipe_target_arch), RtlAluOp} of @@ -689,17 +699,6 @@ mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) -> end, [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)]. -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)), - I2 = mk_branch(Src1, BCond, Sign, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {I2, Map1, Data}. - conv_branch_cond(Cond) -> % may be unsigned case Cond of gtu -> {'gt', 'unsigned'}; @@ -1031,7 +1030,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StoreSize = hipe_rtl:store_size(I), @@ -1056,13 +1055,28 @@ mk_store(Src, Base1, Base2, StoreSize) -> end. mk_store2(Src, Base1, Base2, StOp) -> - case hipe_ppc:is_temp(Base2) of + case hipe_ppc:is_temp(Base1) of true -> - mk_store_rr(Src, Base1, Base2, StOp); + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_rr(Src, Base1, Base2, StOp); + _ -> + mk_store_ri(Src, Base1, Base2, StOp) + end; _ -> - mk_store_ri(Src, Base1, Base2, StOp) + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_ri(Src, Base2, Base1, StOp); + _ -> + mk_store_ii(Src, Base1, Base2, StOp) + end end. - + +mk_store_ii(Src, Base, Disp, StOp) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Base, + mk_store_ri(Src, Tmp, Disp, StOp)). + mk_store_ri(Src, Base, Disp, StOp) -> hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). diff --git a/lib/hipe/regalloc/Makefile b/lib/hipe/regalloc/Makefile index aaa4418f37..662efc9707 100644 --- a/lib/hipe/regalloc/Makefile +++ b/lib/hipe/regalloc/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2001-2016. All Rights Reserved. +# Copyright Ericsson AB 2001-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -50,7 +50,10 @@ MODULES = hipe_ig hipe_ig_moves hipe_moves \ hipe_optimistic_regalloc \ hipe_coalescing_regalloc \ hipe_graph_coloring_regalloc \ + hipe_range_split \ hipe_regalloc_loop \ + hipe_regalloc_prepass \ + hipe_restore_reuse \ hipe_ls_regalloc \ hipe_ppc_specific hipe_ppc_specific_fp \ hipe_sparc_specific hipe_sparc_specific_fp \ @@ -123,7 +126,6 @@ $(EBIN)/hipe_amd64_specific_x87.beam: hipe_x86_specific_x87.erl $(EBIN)/hipe_coalescing_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_graph_coloring_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_ig.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_spillcost.hrl -$(EBIN)/hipe_ig_moves.beam: ../util/hipe_vectors.hrl $(EBIN)/hipe_ls_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_optimistic_regalloc.beam: ../main/hipe.hrl $(EBIN)/hipe_regalloc_loop.beam: ../main/hipe.hrl diff --git a/lib/hipe/regalloc/hipe_adj_list.erl b/lib/hipe/regalloc/hipe_adj_list.erl index de59da2410..5066106074 100644 --- a/lib/hipe/regalloc/hipe_adj_list.erl +++ b/lib/hipe/regalloc/hipe_adj_list.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_adj_list.erl diff --git a/lib/hipe/regalloc/hipe_amd64_specific.erl b/lib/hipe/regalloc/hipe_amd64_specific.erl index 6937e71ac7..72900563e6 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% + -define(HIPE_AMD64, true). -include("hipe_x86_specific.erl"). diff --git a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl index 50e5869d45..d592ba391c 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_sse2.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,44 +11,58 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_amd64_specific_sse2). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]). % The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_amd64_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - def_use/1, - is_arg/1, %% used by hipe_ls_regalloc - is_move/1, - is_fixed/1, %% used by hipe_graph_coloring_regalloc - is_global/1, - is_precoloured/1, - reg_nr/1, - non_alloc/1, - allocatable/0, - physical_name/1, - all_precoloured/0, - new_spill_index/1, %% used by hipe_ls_regalloc - var_range/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + def_use/2, + is_arg/2, %% used by hipe_ls_regalloc + is_move/2, + is_spill_move/2, + is_fixed/2, %% used by hipe_graph_coloring_regalloc + is_global/2, + is_precoloured/2, + reg_nr/2, + non_alloc/2, + allocatable/1, + allocatable/2, + temp0/1, + physical_name/2, + all_precoloured/1, + new_spill_index/2, %% used by hipe_ls_regalloc + var_range/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3, + check_and_rewrite/4]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). + +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). %%---------------------------------------------------------------------------- @@ -60,86 +70,102 @@ %%---------------------------------------------------------------------------- -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite(CFG, Coloring). -check_and_rewrite(Defun, Coloring) -> - hipe_amd64_ra_sse2_postconditions:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, Strategy, no_context) -> + hipe_amd64_ra_sse2_postconditions:check_and_rewrite( + CFG, Coloring, Strategy). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). -is_global(_Reg) -> - false. +is_global(Reg, _) -> + hipe_amd64_registers:sse2_temp0() =:= Reg. -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -is_arg(_Reg) -> +is_arg(_Reg, _) -> false. --spec args(#cfg{}) -> []. -args(_CFG) -> +-spec args(#cfg{}, no_context) -> []. +args(_CFG, _) -> []. -non_alloc(_) -> +non_alloc(_, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_amd64_liveness:analyze(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> [X || X <- hipe_amd64_liveness:livein(Liveness, L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> [X || X <- hipe_amd64_liveness:liveout(BB_in_out_liveness, Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. %% Registers stuff -allocatable() -> - hipe_amd64_registers:allocatable_sse2(). +allocatable(Ctx) -> + allocatable('normal', Ctx). + +allocatable('normal', _) -> + hipe_amd64_registers:allocatable_sse2(); +allocatable('linearscan', _) -> + hipe_amd64_registers:allocatable_sse2() -- + [hipe_amd64_registers:sse2_temp0()]. + +temp0(_) -> + hipe_amd64_registers:sse2_temp0(). -all_precoloured() -> - allocatable(). +all_precoloured(Ctx) -> + allocatable(Ctx). -is_precoloured(Reg) -> - lists:member(Reg,all_precoloured()). +is_precoloured(Reg, _) -> + hipe_amd64_registers:is_precoloured_sse2(Reg). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(x86). --spec number_of_temporaries(#cfg{}) -> non_neg_integer(). -number_of_temporaries(_CFG) -> +-spec number_of_temporaries(#cfg{}, no_context) -> non_neg_integer(). +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_x86_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_x86_cfg:bb_add(CFG,L,BB). + +branch_preds(Instr,_) -> + hipe_x86_cfg:branch_preds(Instr). + %% AMD64 stuff -def_use(Instruction) -> +def_use(Instruction, _) -> {[X || X <- hipe_amd64_defuse:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double'], @@ -148,17 +174,19 @@ def_use(Instruction) -> hipe_x86:temp_type(X) =:= 'double'] }. -uses(I) -> +uses(I, _) -> [X || X <- hipe_amd64_defuse:insn_use(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -defines(I) -> +defines(I, _) -> [X || X <- hipe_amd64_defuse:insn_def(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -is_move(Instruction) -> +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). + +is_move(Instruction, _) -> case hipe_x86:is_fmove(Instruction) of true -> Src = hipe_x86:fmove_src(Instruction), @@ -167,10 +195,51 @@ is_move(Instruction) -> andalso hipe_x86:is_temp(Dst) andalso hipe_x86:temp_is_allocatable(Dst); false -> false end. + +is_spill_move(Instruction,_) -> + hipe_x86:is_pseudo_spill_fmove(Instruction). -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_x86:temp_reg(Reg). --spec new_spill_index(non_neg_integer()) -> pos_integer(). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +mk_move(Src, Dst, _) -> + hipe_x86:mk_fmove(Src, Dst). + +mk_goto(Label, _) -> + hipe_x86:mk_jmp_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_x86_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(x86). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, _Temp, _) -> + hipe_x86:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_amd64_subst:insn_temps( + fun(Op) -> + case hipe_x86:temp_is_allocatable(Op) + andalso hipe_x86:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + +-spec new_spill_index(non_neg_integer(), no_context) -> pos_integer(). +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex + 1. diff --git a/lib/hipe/regalloc/hipe_amd64_specific_x87.erl b/lib/hipe/regalloc/hipe_amd64_specific_x87.erl index 2160e93d24..918f72f5f2 100644 --- a/lib/hipe/regalloc/hipe_amd64_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_amd64_specific_x87.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% + -define(HIPE_AMD64, true). -include("hipe_x86_specific_x87.erl"). diff --git a/lib/hipe/regalloc/hipe_arm_specific.erl b/lib/hipe/regalloc/hipe_arm_specific.erl index 4e34cb1d99..7ebc6aa336 100644 --- a/lib/hipe/regalloc/hipe_arm_specific.erl +++ b/lib/hipe/regalloc/hipe_arm_specific.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,121 +11,138 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_arm_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_spill_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_arm_cfg:init(Defun). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - hipe_arm_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_arm_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_arm_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_arm_registers:nr_args(), hipe_arm_cfg:params(CFG)). %% same as hipe_arm_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_arm_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_arm_liveness_gpr:livein(Liveness,L), hipe_arm:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_arm_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_arm:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_arm_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_arm_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_arm_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_arm_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_arm_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(arm). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(arm), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_arm_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_arm_cfg:bb_add(CFG,L,BB). + +branch_preds(Branch,_) -> + hipe_arm_cfg:branch_preds(Branch). + %% ARM stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_arm_defuse:insn_use_gpr(I), hipe_arm:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_arm_defuse:insn_def_gpr(I), hipe_arm:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_arm_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_arm:is_pseudo_move(Instruction) of true -> Dst = hipe_arm:pseudo_move_dst(Instruction), @@ -142,28 +155,67 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +is_spill_move(Instruction, _) -> + hipe_arm:is_pseudo_spill_move(Instruction). + +reg_nr(Reg, _) -> hipe_arm:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_arm:mk_pseudo_move(Dst, Src). + +mk_goto(Label, _) -> + hipe_arm:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_arm_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(arm). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(arm). + +update_reg_nr(Nr, Temp, _) -> + hipe_arm:mk_temp(Nr, hipe_arm:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_arm_subst:insn_temps( + fun(Op) -> + case hipe_arm:temp_is_allocatable(Op) of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_arm_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_arm_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_arm_registers:temp1() orelse R =:= hipe_arm_registers:temp2() orelse R =:= hipe_arm_registers:temp3() orelse hipe_arm_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_arm_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_arm_registers:args(hipe_arm_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl index e2f817d369..b8f0a1974c 100644 --- a/lib/hipe/regalloc/hipe_coalescing_regalloc.erl +++ b/lib/hipe/regalloc/hipe_coalescing_regalloc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%----------------------------------------------------------------------- %% File : hipe_coalescing_regalloc.erl @@ -30,7 +24,7 @@ %%----------------------------------------------------------------------- -module(hipe_coalescing_regalloc). --export([regalloc/5]). +-export([regalloc/7]). %%-ifndef(DEBUG). %%-define(DEBUG,true). @@ -51,19 +45,21 @@ %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, TargetContext, + _Options) -> + Target = {TargetMod, TargetContext}, %% Build interference graph ?debug_msg("Build IG\n", []), - IG = hipe_ig:build(CFG, Target), + IG = hipe_ig:build(CFG, Liveness, TargetMod, TargetContext), %% io:format("IG: ~p\n", [IG]), ?debug_msg("Init\n", []), - Num_Temps = Target:number_of_temporaries(CFG), + Num_Temps = TargetMod:number_of_temporaries(CFG,TargetContext), ?debug_msg("Coalescing RA: num_temps = ~p~n", [Num_Temps]), - Allocatable = Target:allocatable(), + Allocatable = TargetMod:allocatable(TargetContext), K = length(Allocatable), All_colors = colset_from_list(Allocatable), @@ -72,7 +68,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> Move_sets = hipe_moves:new(IG), ?debug_msg("Build Worklist\n", []), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, Move_sets, K, Num_Temps), + Worklists = hipe_reg_worklists:new(IG, TargetMod, TargetContext, CFG, + Move_sets, K, Num_Temps), Alias = initAlias(Num_Temps), ?debug_msg("Do coloring\n~p~n", [Worklists]), @@ -81,10 +78,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% io:format("SelStk0 ~w\n",[SelStk0]), ?debug_msg("Init node sets\n", []), Node_sets = hipe_node_sets:new(), - %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% io:format("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n", []), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(TargetMod:all_precoloured(TargetContext), initColor(Num_Temps), Node_sets, Target), ?debug_msg("Assign colors\n", []), @@ -94,9 +91,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% io:format("color0:~w\nColor1:~w\nNodes:~w\nNodes2:~w\nNum_Temps:~w\n",[Color0,Color1,Node_sets,Node_sets2,Num_Temps]), ?debug_msg("Build mapping ~p\n", [Node_sets2]), - Coloring = build_namelist(Node_sets2, SpillIndex, Alias0, Color1), + {Coloring, SpillIndex2} = + build_namelist(Node_sets2, SpillIndex, Alias0, Color1), ?debug_msg("Coloring ~p\n", [Coloring]), - Coloring. + {Coloring, SpillIndex2}. %%---------------------------------------------------------------------- %% Function: do_coloring @@ -379,7 +377,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) -> false -> % Colour case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), assignColors(Stack1, NodeSets1, Color1, Alias, AllColors, Target) end end. @@ -402,7 +400,7 @@ assignColors(Stack, NodeSets, Color, Alias, AllColors, Target) -> defaultColoring([], Color, NodeSets, _Target) -> {Color,NodeSets}; defaultColoring([Reg|Regs], Color, NodeSets, Target) -> - Color1 = setColor(Reg,Target:physical_name(Reg), Color), + Color1 = setColor(Reg,physical_name(Reg,Target), Color), NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), defaultColoring(Regs, Color1, NodeSets1, Target). @@ -567,7 +565,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst,Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -577,7 +575,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), {Moves1, IG, Worklists1, Alias}; true -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V,Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> Moves1 = Moves0, % drop constrained move Move @@ -585,7 +583,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), {Moves1, IG, Worklists2, Alias}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U,Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -627,7 +625,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> %%---------------------------------------------------------------------- add_worklist(Worklists, U, K, Moves, IG, Target) -> - case (not(Target:is_precoloured(U)) + case (not(is_precoloured(U,Target)) andalso not(hipe_moves:move_related(U, Moves)) andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of true -> @@ -711,7 +709,7 @@ combine(U, V, IG, Worklists, Moves, Alias, K, Target) -> combine_edges([], _U, IG, Worklists, Moves, _K, _Target) -> {IG, Worklists, Moves}; -combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges(Ts, U, IG, Worklists, Moves, K, Target); _ -> @@ -728,7 +726,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% worklist, and that's where decrement_degree() expects to find it. %% This issue is not covered in the published algorithm. OldDegree = hipe_ig:get_node_degree(T, IG), - IG1 = hipe_ig:add_edge(T, U, IG, Target), + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), NewDegree = hipe_ig:get_node_degree(T, IG1), Worklists0 = if NewDegree =:= K, OldDegree =:= K-1 -> @@ -767,7 +765,7 @@ combine_edges([T|Ts], U, IG, Worklists, Moves, K, Target) -> ok(T, R, IG, K, Target) -> ((hipe_ig:is_trivially_colourable(T, K, IG)) - orelse Target:is_precoloured(T) + orelse is_precoloured(T,Target) orelse hipe_ig:nodes_are_adjacent(T, R, IG)). %%---------------------------------------------------------------------- @@ -916,7 +914,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> %% limit are extremely expensive. getCost(Node, IG, SpillLimit) -> - case Node > SpillLimit of + case Node >= SpillLimit of true -> inf; false -> hipe_ig:node_spill_cost(Node, IG) end. @@ -1028,3 +1026,15 @@ freezeEm3(_U, V, _M, K, WorkLists, Moves, IG, _Alias) -> false -> {WorkLists, Moves1} end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl index bc6e442236..f82d3a2cbc 100644 --- a/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl +++ b/lib/hipe/regalloc/hipe_graph_coloring_regalloc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%@doc @@ -51,7 +45,7 @@ %% -module(hipe_graph_coloring_regalloc). --export([regalloc/5]). +-export([regalloc/7]). %%-ifndef(DO_ASSERT). %%-define(DO_ASSERT, true). @@ -77,18 +71,21 @@ %% that the coloring agrees with the interference graph (that is, that %% no neighbors have the same register or spill location). -%% @spec regalloc(#cfg{}, non_neg_fixnum(), non_neg_fixnum(), atom(), list()) -> {, non_neg_fixnum()} +%% @spec regalloc(#cfg{}, liveness(), non_neg_fixnum(), non_neg_fixnum(), +%% module(), tgt_ctx(), list()) -> {, non_neg_fixnum()} -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - PhysRegs = Target:allocatable(), +regalloc(CFG, Live, SpillIndex, SpillLimit, TargetMod, TargetContext, + _Options) -> + Target = {TargetMod, TargetContext}, + PhysRegs = allocatable(Target), ?report2("building IG~n", []), - {IG, Spill} = build_ig(CFG, Target), + {IG, Spill} = build_ig(CFG, Live, Target), %% check_ig(IG), ?report3("graph: ~p~nphysical regs: ~p~n", [list_ig(IG), PhysRegs]), %% These nodes *can't* be allocated to registers. - NotAllocatable = [Target:reg_nr(X) || X <- Target:non_alloc(CFG)], + NotAllocatable = non_alloc(CFG, Target), %% i.e. Arguments on x86 ?report2("Nonalloc ~w~n", [NotAllocatable]), @@ -97,7 +94,7 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ordsets:from_list(PhysRegs), SpillIndex, SpillLimit, - Target:number_of_temporaries(CFG), + number_of_temporaries(CFG, Target), Target, NotAllocatable), Coloring = [{X, {reg, X}} || X <- NotAllocatable] ++ Cols, ?ASSERT(check_coloring(Coloring, IG, Target)), @@ -112,15 +109,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> %% Returns {Interference_graph, Spill_cost_dictionary} %% -build_ig(CFG, Target) -> - try build_ig0(CFG, Target) - catch error:Rsn -> exit({?MODULE, build_ig, Rsn}) - end. - -build_ig0(CFG, Target) -> - Live = Target:analyze(CFG), - NumN = Target:number_of_temporaries(CFG), % poss. N-1? - {IG, Spill} = build_ig_bbs(Target:labels(CFG), +build_ig(CFG, Live, Target) -> + NumN = number_of_temporaries(CFG, Target), % poss. N-1? + {IG, Spill} = build_ig_bbs(labels(CFG, Target), CFG, Live, empty_ig(NumN), @@ -208,17 +199,8 @@ set_spill_cost(X, N, Spill) -> %% * add low-degree neighbors of z to low %% * restart the while-loop above -color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, NotAllocatable) -> - try color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, - NumNodes, Target, NotAllocatable) - catch - error:Rsn -> - ?error_msg("Coloring failed with ~p~n", [Rsn]), - ?EXIT(Rsn) - end. - -color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, - NotAllocatable) -> +color(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, + NotAllocatable) -> ?report("simplification of IG~n", []), K = ordsets:size(PhysRegs), Nodes = list_ig(IG), @@ -227,14 +209,14 @@ color_0(IG, Spill, PhysRegs, SpillIx, SpillLimit, NumNodes, Target, %% Any nodes above the spillimit must be colored first... MustNotSpill = - if NumNodes > SpillLimit+1 -> - sort_on_degree(lists:seq(SpillLimit+1,NumNodes-1) -- Low,IG); + if NumNodes > SpillLimit -> + sort_on_degree(lists:seq(SpillLimit,NumNodes-1) -- Low,IG); true -> [] end, ?report(" starting with low degree nodes ~p~n",[Low]), EmptyStk = [], - Precolored = Target:all_precoloured(), + Precolored = all_precoloured(Target), {Stk, NewSpillIx} = simplify(Low, NumNodes, Precolored, IG, Spill, K, SpillIx, EmptyStk, @@ -415,11 +397,11 @@ spill_costs([{N,Info}|Ns], IG, Vis, Spill, SpillLimit, Target) -> true -> spill_costs(Ns,IG,Vis,Spill, SpillLimit, Target); _ -> - case Target:is_fixed(N) of + case is_fixed(N, Target) of true -> spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); false -> - if N > SpillLimit -> + if N >= SpillLimit -> spill_costs(Ns, IG, Vis, Spill, SpillLimit, Target); true -> [{spill_cost_of(N,Spill)/Deg,N} | @@ -772,18 +754,36 @@ valid_coloring(X, C, [_|Ys]) -> %% *** INTERFACES TO OTHER MODULES *** %% -liveout(CFG, L, Target) -> - ordsets:from_list(reg_names(Target:liveout(CFG, L), Target)). +all_precoloured({TgtMod,TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> + TgtMod:allocatable(TgtCtx). + +is_fixed(Reg, {TgtMod,TgtCtx}) -> + TgtMod:is_fixed(Reg, TgtCtx). + +labels(CFG, {TgtMod,TgtCtx}) -> + TgtMod:labels(CFG, TgtCtx). + +liveout(CFG, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(CFG, L, TgtCtx), Target)). + +bb(CFG, L, {TgtMod,TgtCtx}) -> + hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx)). + +def_use(X, Target={TgtMod,TgtCtx}) -> + {ordsets:from_list(reg_names(TgtMod:defines(X,TgtCtx), Target)), + ordsets:from_list(reg_names(TgtMod:uses(X,TgtCtx), Target))}. -bb(CFG, L, Target) -> - hipe_bb:code(Target:bb(CFG, L)). +non_alloc(CFG, Target={TgtMod,TgtCtx}) -> + reg_names(TgtMod:non_alloc(CFG, TgtCtx), Target). -def_use(X, Target) -> - {ordsets:from_list(reg_names(Target:defines(X), Target)), - ordsets:from_list(reg_names(Target:uses(X), Target))}. +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -reg_names(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. %% %% Precoloring: use this version when a proper implementation of @@ -803,5 +803,5 @@ precolor0([R|Rs], Cols, Target) -> {[{R, {reg, physical_name(R, Target)}}|Cs], set_color(R, physical_name(R, Target), Cols1)}. -physical_name(X, Target) -> - Target:physical_name(X). +physical_name(X, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(X, TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ig.erl b/lib/hipe/regalloc/hipe_ig.erl index 8fd5d0df1f..14a1ae77f2 100644 --- a/lib/hipe/regalloc/hipe_ig.erl +++ b/lib/hipe/regalloc/hipe_ig.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_ig.erl @@ -28,7 +22,7 @@ -module(hipe_ig). --export([build/2, +-export([build/4, nodes_are_adjacent/3, node_spill_cost/2, node_adj_list/2, @@ -38,8 +32,8 @@ spill_costs/1, adj_list/1, %% adj_set/1, - add_edge/4, - remove_edge/4, + add_edge/5, + remove_edge/5, %% set_adj_set/2, %% set_adj_list/2, %% set_ig_moves/2, @@ -64,6 +58,9 @@ -include("../flow/cfg.hrl"). -include("hipe_spillcost.hrl"). +-type target_context() :: any(). +-type target() :: {TargetMod :: module(), TargetContext :: target_context()}. + %%---------------------------------------------------------------------- -record(igraph, {adj_set, adj_list, ig_moves, degree, @@ -78,11 +75,11 @@ %% degree, and testing for trivial colourability (degree < K). %%---------------------------------------------------------------------- -degree_new(No_temporaries, Target) -> +degree_new(No_temporaries, {TargetMod, TargetCtx}) -> Degree = hipe_bifs:array(No_temporaries, 0), - K = length(Target:allocatable()), + K = length(TargetMod:allocatable(TargetCtx)), Inf = K + No_temporaries, - precoloured_to_inf_degree(Target:all_precoloured(), Inf, Degree). + precoloured_to_inf_degree(TargetMod:all_precoloured(TargetCtx), Inf, Degree). precoloured_to_inf_degree([], _Inf, Degree) -> Degree; precoloured_to_inf_degree([P|Ps], Inf, Degree) -> @@ -344,7 +341,7 @@ set_spill_costs(Spill_costs, IG) -> IG#igraph{spill_costs = Spill_costs}. %% A new interference record %%---------------------------------------------------------------------- --spec initial_ig(non_neg_integer(), atom()) -> #igraph{}. +-spec initial_ig(non_neg_integer(), target()) -> #igraph{}. initial_ig(NumTemps, Target) -> #igraph{adj_set = adjset_new(NumTemps), @@ -361,20 +358,21 @@ initial_ig(NumTemps, Target) -> %% Description: Constructs an interference graph for the specifyed CFG. %% %% Parameters: -%% CFG -- A Control Flow Graph -%% Target -- The module that contains the target-specific functions +%% CFG -- A Control Flow Graph +%% TargetMod -- The module that contains the target-specific functions +%% TargetCtx -- Context data to pass to TargetMod %% %% Returns: %% An interference graph for the given CFG. %%---------------------------------------------------------------------- --spec build(#cfg{}, atom()) -> #igraph{}. +-spec build(#cfg{}, Liveness::_, module(), target_context()) -> #igraph{}. -build(CFG, Target) -> - BBs_in_out_liveness = Target:analyze(CFG), - Labels = Target:labels(CFG), +build(CFG, BBs_in_out_liveness, TargetMod, TargetCtx) -> + Target = {TargetMod, TargetCtx}, + Labels = TargetMod:labels(CFG, TargetCtx), %% How many temporaries exist? - NumTemps = Target:number_of_temporaries(CFG), + NumTemps = TargetMod:number_of_temporaries(CFG, TargetCtx), IG0 = initial_ig(NumTemps, Target), %%?debug_msg("initial adjset: ~p\n",[element(2, IG0)]), %%?debug_msg("initial adjset array: ~.16b\n",[element(3, element(2, IG0))]), @@ -395,7 +393,7 @@ build(CFG, Target) -> %% CFG -- The Control Flow Graph that we constructs %% the interference graph from. %% Target -- The module containing the target-specific -%% functions +%% functions, along with its context data %% %% Returns: %% An interference graph for the given CFG. @@ -404,13 +402,11 @@ build(CFG, Target) -> analyze_bbs([], _, IG, _, _) -> IG; analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) -> % Get basic block associated with label L - BB = Target:bb(CFG, L), + BB = bb(CFG, L, Target), % Get basic block code BB_code = hipe_bb:code(BB), - % Temporaries that are live out from this basic block - BB_liveout = Target:liveout(BBs_in_out_liveness, L), - % Only temporary numbers - BB_liveout_numbers = reg_numbers(BB_liveout, Target), + % Temporaries that are live out from this basic block, only numbers + BB_liveout_numbers = liveout(BBs_in_out_liveness, L, Target), % {Liveness, New Interference Graph} {_, New_ig, Ref} = analyze_bb_instructions(BB_code, ordsets:from_list(BB_liveout_numbers), @@ -433,7 +429,8 @@ analyze_bbs([L|Ls], BBs_in_out_liveness, IG, CFG, Target) -> %% Live -- All temporaries that are live at the time. %% Live is a set of temporary "numbers only". %% IG -- The interference graph in it's current state -%% Target -- The mopdule containing the target-specific functions +%% Target -- The mopdule containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Live -- Temporaries that are live at entery of basic block @@ -449,7 +446,7 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) -> {Live0, IG0, Ref} = analyze_bb_instructions(Instructions, Live, IG, Target), %% Check for temporaries that are defined and used in instruction - {Def, Use} = Target:def_use(Instruction), + {Def, Use} = def_use(Instruction, Target), %% Convert to register numbers Def_numbers = ordsets:from_list(reg_numbers(Def, Target)), Use_numbers = ordsets:from_list(reg_numbers(Use, Target)), @@ -501,14 +498,15 @@ analyze_bb_instructions([Instruction|Instructions], Live, IG, Target) -> %% Def_numbers -- Temporaries that are defined at this instruction %% Use_numbers -- Temporaries that are used at this instruction %% IG -- The interference graph in its current state -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data %% Returns: %% Live -- An updated live set %% IG -- An updated interference graph %%---------------------------------------------------------------------- analyze_move(Instruction, Live, Def_numbers, Use_numbers, IG, Target) -> - case Target:is_move(Instruction) of + case is_move(Instruction,Target) of true -> case {Def_numbers, Use_numbers} of {[Dst], [Src]} -> @@ -554,8 +552,9 @@ interfere([Define|Defines], Living, IG, Target) -> %% Live -- Current live set %% Lives -- Rest of living temporaries. %% IG -- An interference graph -%% Target -- The module containing the target-specific functions -%% Returns: +%% Target -- The module containing the target-specific functions, along +%% with its context data. +%% Returns: %% An updated interference graph %%---------------------------------------------------------------------- @@ -623,11 +622,15 @@ get_moves(IG) -> %% Parameters: %% U -- A temporary number %% V -- A temporary number -%% Target -- The module containing the target-specific functions +%% TargetMod -- The module containing the target-specific functions. +%% TargetCtx -- Context data to pass to TargetMod %% Returns: %% An updated interference graph. %%---------------------------------------------------------------------- +add_edge(U, V, IG, TargetMod, TargetCtx) -> + add_edge(U, V, IG, {TargetMod, TargetCtx}). + add_edge(U, U, IG, _) -> IG; add_edge(U, V, IG, Target) -> case nodes_are_adjacent(U, V, IG) of @@ -652,11 +655,15 @@ add_edge(U, V, IG, Target) -> %% Parameters: %% U -- A temporary number %% V -- A temporary number -%% Target -- The module containing the target-specific functions +%% TargetMod -- The module containing the target-specific functions. +%% TargetCtx -- Context data for TargetMod. %% Returns: %% An updated interference graph. %%---------------------------------------------------------------------- +remove_edge(U, V, IG, TargetMod, TargetCtx) -> + remove_edge(U, V, IG, {TargetMod, TargetCtx}). + remove_edge(U, U, IG, _) -> IG; remove_edge(U, V, IG, Target) -> case nodes_are_adjacent(U, V, IG) of @@ -683,8 +690,8 @@ remove_edge(U, V, IG, Target) -> %% precoloured. %% Adj_list -- An adj_list %% Degree -- The degree that all nodes currently have -%% Target -- The module containing the target-specific -%% functions +%% Target -- The module containing the target-specific +%% functions, along with its context data. %% %% Returns: %% Adj_list -- An updated adj_list data structure @@ -692,7 +699,7 @@ remove_edge(U, V, IG, Target) -> %%---------------------------------------------------------------------- remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> - case Target:is_precoloured(Temp) of + case is_precoloured(Temp,Target) of false -> New_adj_list = hipe_adj_list:remove_edge(Temp, InterfereTemp, Adj_list), degree_dec(Temp, Degree), @@ -714,8 +721,8 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %% precoloured. %% Adj_list -- An adj_list %% Degree -- The degree that all nodes currently have -%% Target -- The module containing the target-specific -%% functions +%% Target -- The module containing the target-specific +%% functions, along with its context data. %% %% Returns: %% Adj_list -- An updated adj_list data structure @@ -723,7 +730,7 @@ remove_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %%---------------------------------------------------------------------- interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> - case Target:is_precoloured(Temp) of + case is_precoloured(Temp, Target) of false -> New_adj_list = hipe_adj_list:add_edge(Temp, InterfereTemp, Adj_list), degree_inc(Temp, Degree), @@ -740,13 +747,14 @@ interfere_if_uncolored(Temp, InterfereTemp, Adj_list, Degree, Target) -> %% %% Parameters: %% TRs -- A list of temporary registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along with +%% its context data. %% Returns: %% A list of register numbers. %%---------------------------------------------------------------------- -reg_numbers(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +reg_numbers(Regs, {TgtMod, TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. %%--------------------------------------------------------------------- %% Print functions - only used for debugging @@ -775,3 +783,24 @@ dec_node_degree(Node, IG) -> is_trivially_colourable(Node, K, IG) -> degree_is_trivially_colourable(Node, K, degree(IG)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +bb(CFG, L, {TgtMod,TgtCtx}) -> + TgtMod:bb(CFG,L,TgtCtx). + +def_use(Instruction, {TgtMod,TgtCtx}) -> + TgtMod:def_use(Instruction, TgtCtx). + +is_move(Instruction, {TgtMod,TgtCtx}) -> + TgtMod:is_move(Instruction, TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> + reg_numbers(TgtMod:liveout(Liveness,L,TgtCtx), Target). diff --git a/lib/hipe/regalloc/hipe_ig_moves.erl b/lib/hipe/regalloc/hipe_ig_moves.erl index b679453de0..e193a682bf 100644 --- a/lib/hipe/regalloc/hipe_ig_moves.erl +++ b/lib/hipe/regalloc/hipe_ig_moves.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%============================================================================= @@ -25,8 +19,6 @@ new_move/3, get_moves/1]). --include("../util/hipe_vectors.hrl"). - %%----------------------------------------------------------------------------- %% The main data structure; its fields are: %% - movelist : mapping from temp to set of associated move numbers @@ -34,11 +26,13 @@ %% - moveinsns : list of move instructions, in descending move number order %% - moveset : set of move instructions --record(ig_moves, {movelist :: hipe_vector(), +-record(ig_moves, {movelist :: movelist(), nrmoves = 0 :: non_neg_integer(), moveinsns = [] :: [{_,_}], moveset = gb_sets:empty() :: gb_sets:set()}). +-type movelist() :: hipe_vectors:vector(ordsets:ordset(non_neg_integer())). + %%----------------------------------------------------------------------------- -spec new(non_neg_integer()) -> #ig_moves{}. @@ -66,7 +60,8 @@ new_move(Dst, Src, IG_moves) -> moveset = gb_sets:insert(MoveInsn, MoveSet)} end. --spec add_movelist(non_neg_integer(), non_neg_integer(), hipe_vector()) -> hipe_vector(). +-spec add_movelist(non_neg_integer(), non_neg_integer(), movelist()) + -> movelist(). add_movelist(MoveNr, Temp, MoveList) -> AssocMoves = hipe_vectors:get(MoveList, Temp), @@ -74,7 +69,7 @@ add_movelist(MoveNr, Temp, MoveList) -> %% ordset due to the ordsets:union in hipe_coalescing_regalloc:combine(). hipe_vectors:set(MoveList, Temp, ordsets:add_element(MoveNr, AssocMoves)). --spec get_moves(#ig_moves{}) -> {hipe_vector(), non_neg_integer(), tuple()}. +-spec get_moves(#ig_moves{}) -> {movelist(), non_neg_integer(), tuple()}. get_moves(IG_moves) -> % -> {MoveList, NrMoves, MoveInsns} {IG_moves#ig_moves.movelist, diff --git a/lib/hipe/regalloc/hipe_ls_regalloc.erl b/lib/hipe/regalloc/hipe_ls_regalloc.erl index d24b803524..785aa2b080 100644 --- a/lib/hipe/regalloc/hipe_ls_regalloc.erl +++ b/lib/hipe/regalloc/hipe_ls_regalloc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% ===================================================================== %% @doc @@ -56,7 +50,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_ls_regalloc). --export([regalloc/7]). +-export([regalloc/9]). %%-define(DEBUG,1). -define(HIPE_INSTRUMENT_COMPILER, true). @@ -95,11 +89,10 @@ %% </ol> %% @end %%- - - - - - - - - - - - - - - - - - - - - - - - -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TargetMod, TargetContext) -> + Target = {TargetMod, TargetContext}, ?debug_msg("LinearScan: ~w\n", [erlang:statistics(runtime)]), - %% Step 1: Calculate liveness (Call external implementation.) - Liveness = liveness(CFG, Target), - ?debug_msg("liveness (done)~w\n", [erlang:statistics(runtime)]), USIntervals = calculate_intervals(CFG, Liveness, Entrypoints, Options, Target), ?debug_msg("intervals (done) ~w\n", [erlang:statistics(runtime)]), @@ -108,10 +101,10 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> %% ?debug_msg("Intervals ~w\n", [Intervals]), ?debug_msg("No intervals: ~w\n",[length(Intervals)]), ?debug_msg("count intervals (done) ~w\n", [erlang:statistics(runtime)]), - Allocation = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), + {Coloring, NewSpillIndex} + = allocate(Intervals, PhysRegs, SpillIndex, DontSpill, Target), ?debug_msg("allocation (done) ~w\n", [erlang:statistics(runtime)]), - Allocation. - + {Coloring, NewSpillIndex}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @@ -125,32 +118,33 @@ regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> %% Liveness: A map of live-in and live-out sets for each Basic-Block. %% Entrypoints: A set of BB names that have external entrypoints. %% -calculate_intervals(CFG,Liveness,_Entrypoints, Options, Target) -> +calculate_intervals(CFG,Liveness,_Entrypoints, Options, + Target={TgtMod,TgtCtx}) -> %% Add start point for the argument registers. Args = arg_vars(CFG, Target), Interval = - add_def_point(Args, 0, empty_interval(Target:number_of_temporaries(CFG))), + add_def_point(Args, 0, empty_interval(number_of_temporaries(CFG, Target))), %% Interval = add_livepoint(Args, 0, empty_interval()), Worklist = case proplists:get_value(ls_order, Options) of reversepostorder -> - Target:reverse_postorder(CFG); + TgtMod:reverse_postorder(CFG, TgtCtx); breadth -> - Target:breadthorder(CFG); + TgtMod:breadthorder(CFG, TgtCtx); postorder -> - Target:postorder(CFG); + TgtMod:postorder(CFG, TgtCtx); inorder -> - Target:inorder(CFG); + TgtMod:inorder(CFG, TgtCtx); reverse_inorder -> - Target:reverse_inorder(CFG); + TgtMod:reverse_inorder(CFG, TgtCtx); preorder -> - Target:preorder(CFG); + TgtMod:preorder(CFG, TgtCtx); prediction -> - Target:predictionorder(CFG); + TgtMod:predictionorder(CFG, TgtCtx); random -> - Target:labels(CFG); + TgtMod:labels(CFG, TgtCtx); _ -> - Target:reverse_postorder(CFG) + TgtMod:reverse_postorder(CFG, TgtCtx) end, %% ?inc_counter(bbs_counter, length(Worklist)), %% ?debug_msg("No BBs ~w\n",[length(Worklist)]), @@ -290,7 +284,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) -> alloc(OtherTemp,NewPhys,NewAlloc), SpillIndex, DontSpill, Target); false -> - NewSpillIndex = Target:new_spill_index(SpillIndex), + NewSpillIndex = new_spill_index(SpillIndex, Target), {NewAlloc2, NewActive4} = spill(OtherTemp, OtherEnd, OtherStart, NewActive3, NewAlloc, SpillIndex, DontSpill, Target), @@ -306,7 +300,7 @@ allocate([RegInt|RIS], Free, Active, Alloc, SpillIndex, DontSpill, Target) -> case NewFree of [] -> %% No physical registers available, we have to spill. - NewSpillIndex = Target:new_spill_index(SpillIndex), + NewSpillIndex = new_spill_index(SpillIndex, Target), {NewAlloc, NewActive2} = spill(Temp, endpoint(RegInt), startpoint(RegInt), Active, Alloc, SpillIndex, DontSpill, Target), @@ -752,38 +746,41 @@ create_freeregs([]) -> %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -liveness(CFG, Target) -> - Target:analyze(CFG). +bb(CFG, L, {TgtMod, TgtCtx}) -> + TgtMod:bb(CFG,L,TgtCtx). + +livein(Liveness,L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:livein(Liveness,L,TgtCtx), Target). -bb(CFG, L, Target) -> - Target:bb(CFG,L). +liveout(Liveness,L, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:liveout(Liveness,L,TgtCtx), Target). -livein(Liveness,L, Target) -> - regnames(Target:livein(Liveness,L), Target). +uses(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:uses(I,TgtCtx), Target). -liveout(Liveness,L, Target) -> - regnames(Target:liveout(Liveness,L), Target). +defines(I, Target={TgtMod,TgtCtx}) -> + regnames(TgtMod:defines(I,TgtCtx), Target). -uses(I, Target) -> - regnames(Target:uses(I), Target). +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). -defines(I, Target) -> - regnames(Target:defines(I), Target). +is_global(R, {TgtMod,TgtCtx}) -> + TgtMod:is_global(R,TgtCtx). -is_precoloured(R, Target) -> - Target:is_precoloured(R). +new_spill_index(SpillIndex, {TgtMod,TgtCtx}) -> + TgtMod:new_spill_index(SpillIndex, TgtCtx). -is_global(R, Target) -> - Target:is_global(R). +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). -physical_name(R, Target) -> - Target:physical_name(R). +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). -regnames(Regs, Target) -> - [Target:reg_nr(X) || X <- Regs]. +regnames(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. -arg_vars(CFG, Target) -> - Target:args(CFG). +arg_vars(CFG, {TgtMod,TgtCtx}) -> + TgtMod:args(CFG,TgtCtx). -is_arg(Reg, Target) -> - Target:is_arg(Reg). +is_arg(Reg, {TgtMod,TgtCtx}) -> + TgtMod:is_arg(Reg,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_moves.erl b/lib/hipe/regalloc/hipe_moves.erl index 39ccfb4a2f..409217bb03 100644 --- a/lib/hipe/regalloc/hipe_moves.erl +++ b/lib/hipe/regalloc/hipe_moves.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_moves). -export([new/1, diff --git a/lib/hipe/regalloc/hipe_node_sets.erl b/lib/hipe/regalloc/hipe_node_sets.erl index 01922a34d4..3cdfb62090 100644 --- a/lib/hipe/regalloc/hipe_node_sets.erl +++ b/lib/hipe/regalloc/hipe_node_sets.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_node_sets). diff --git a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl index 2ed9ec3b45..a019c46b90 100644 --- a/lib/hipe/regalloc/hipe_optimistic_regalloc.erl +++ b/lib/hipe/regalloc/hipe_optimistic_regalloc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%----------------------------------------------------------------------- %% File : hipe_optimistic_regalloc.erl @@ -29,7 +23,7 @@ %%----------------------------------------------------------------------- -module(hipe_optimistic_regalloc). --export([regalloc/5]). +-export([regalloc/7]). -ifndef(DEBUG). %%-define(DEBUG,true). @@ -74,20 +68,22 @@ %% SpillLimit -- Temporaris with numbers higher than this have %% infinit spill cost. %% Consider changing this to a set. -%% Target -- The module containing the target-specific functions. +%% TgtMod -- The module containing the target-specific functions. +%% TgtCtx -- Context data for TgtMod %% %% Returns: %% Coloring -- A coloring for specified CFG -%% SpillIndex0 -- A new spill index +%% SpillIndex2 -- A new spill index %%----------------------------------------------------------------------- -ifdef(COMPARE_ITERATED_OPTIMISTIC). -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> + Target = {TgtMod, TgtCtx}, + ?debug_msg("optimistic ~w\n",[TgtMod]), ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG_O = hipe_ig:build(CFG, Target), - IG = hipe_ig:build(CFG, Target), + IG_O = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), + IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -98,9 +94,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SavedAdjList = hipe_ig:adj_list(IG), ?debug_msg("Init\n",[]), - No_temporaries = Target:number_of_temporaries(CFG), + No_temporaries = number_of_temporaries(CFG, Target), ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), - Allocatable = Target:allocatable(), + Allocatable = allocatable(Target), K = length(Allocatable), All_colors = colset_from_list(Allocatable), ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), @@ -113,11 +109,13 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?mov_print_memberships(Move_sets), ?debug_msg("Build Worklist\n",[]), - Worklists_O = hipe_reg_worklists:new(IG_O, Target, CFG, Move_sets_O, K, No_temporaries), + Worklists_O = hipe_reg_worklists:new(IG_O, TgtMod, TgtCtx, CFG, Move_sets_O, + K, No_temporaries), ?debug_msg("Worklists:\n ~p\n", [Worklists_O]), ?reg_print_memberships(Worklists_O), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, + No_temporaries), ?debug_msg("New Worklists:\n ~p\n", [Worklists]), ?reg_print_memberships(Worklists), @@ -175,10 +173,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Init node sets\n",[]), Node_sets = hipe_node_sets:new(), - %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n",[]), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(all_precoloured(Target), initColor(No_temporaries), Node_sets, Target), ?debug_msg("Color0\n",[]), ?print_colors(No_temporaries, Color0), @@ -199,9 +197,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring,SpillIndex2} = + build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - SortedColoring = { sort_stack(element(1, Coloring)), element(2, Coloring)}, + SortedColoring = {sort_stack(Coloring), SpillIndex2}, ?debug_msg("SortedColoring ~p\n",[SortedColoring]), %%Coloring. ?debug_msg("----------------------Assign colors _O\n",[]), @@ -217,14 +216,15 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SortedColoring_O = {sort_stack(element(1, Coloring_O)), element(2, Coloring_O)}, ?debug_msg("SortedColoring_O ~p\n",[SortedColoring_O]), sanity_compare(SortedColoring_O, SortedColoring), - Coloring. + {Coloring,SpillIndex2}. -else. -regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> - ?debug_msg("optimistic ~w\n",[Target]), +regalloc(CFG, Liveness, SpillIndex, SpillLimit, TgtMod, TgtCtx, _Options) -> + Target = {TgtMod, TgtCtx}, + ?debug_msg("optimistic ~w\n",[TgtMod]), ?debug_msg("CFG: ~p\n",[CFG]), %% Build interference graph ?debug_msg("Build IG\n",[]), - IG = hipe_ig:build(CFG, Target), + IG = hipe_ig:build(CFG, Liveness, TgtMod, TgtCtx), ?debug_msg("adjlist: ~p\n",[hipe_ig:adj_list(IG)]), ?debug_msg("IG:\n",[]), ?print_adjacent(IG), @@ -235,9 +235,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> SavedAdjList = hipe_ig:adj_list(IG), ?debug_msg("Init\n",[]), - No_temporaries = Target:number_of_temporaries(CFG), + No_temporaries = number_of_temporaries(CFG, Target), ?debug_msg("Coalescing RA: num_temps = ~p~n", [No_temporaries]), - Allocatable = Target:allocatable(), + Allocatable = allocatable(Target), K = length(Allocatable), All_colors = colset_from_list(Allocatable), ?debug_msg("K: ~w~nAll_colors: ~p\n",[K, All_colors]), @@ -250,7 +250,8 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Build Worklist\n",[]), - Worklists = hipe_reg_worklists:new(IG, Target, CFG, K, No_temporaries), + Worklists = hipe_reg_worklists:new(IG, TgtMod, TgtCtx, CFG, K, + No_temporaries), ?debug_msg("New Worklists:\n ~p\n", [Worklists]), ?reg_print_memberships(Worklists), @@ -292,10 +293,10 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Init node sets\n",[]), Node_sets = hipe_node_sets:new(), - %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,Target:non_alloc(CFG)]), + %% ?debug_msg("NodeSet: ~w\n NonAlloc ~w\n",[Node_sets,non_alloc(CFG,Target)]), ?debug_msg("Default coloring\n",[]), {Color0,Node_sets1} = - defaultColoring(Target:all_precoloured(), + defaultColoring(all_precoloured(Target), initColor(No_temporaries), Node_sets, Target), ?debug_msg("Color0\n",[]), ?print_colors(No_temporaries, Color0), @@ -316,9 +317,9 @@ regalloc(CFG, SpillIndex, SpillLimit, Target, _Options) -> ?debug_msg("Nodes:~w\nNodes2:~w\nNo_temporaries:~w\n",[Node_sets,Node_sets2,No_temporaries]), ?debug_msg("Build mapping _N ~w\n",[Node_sets2]), - Coloring = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), + {Coloring, SpillIndex2} = build_namelist(Node_sets2,SpillIndex,Alias2,Color1), ?debug_msg("Coloring ~p\n",[Coloring]), - Coloring. + {Coloring,SpillIndex2}. -endif. %%---------------------------------------------------------------------- @@ -834,7 +835,8 @@ sort_stack_split(Pivot, [H|T], Smaller, Bigger) -> %% been coalesced, this mapping shows the alias for that %% node. %% AllColors -- This is an ordset containing all the available colors -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Color -- A mapping from nodes to their respective color. @@ -874,7 +876,7 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries, false -> % Color case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), ?debug_msg("Color case. Assigning color ~p to node.~n", [Col]), assignColors(Worklists, Stack1, NodeSets1, Color1, No_Temporaries, SavedAdjList, SavedSpillCosts, IG, Alias, AllColors, Target) end @@ -902,7 +904,8 @@ assignColors(Worklists, Stack, NodeSets, Color, No_Temporaries, %% Alias -- This is a mapping from nodes to nodes. If a node has %% been coalesced, this mapping shows the alias for that %% node. -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Alias -- The restored aliases after the uncoalescing. @@ -1006,7 +1009,7 @@ colorSplit([], _Col, NodeSets, Color, _Target) -> colorSplit([Node|Nodes], Col, NodeSets, Color, Target) -> ?debug_msg(" Coloring node ~p with color ~p.~n", [Node, Col]), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), colorSplit(Nodes, Col, NodeSets1, Color1, Target). %% Place non-colorable nodes in a split at the bottom of the SelectStack. @@ -1035,7 +1038,8 @@ enqueueSplit([Node|Nodes], IG, Stack) -> %% node. %% AllColors -- This is an ordset containing all the available colors %% -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% Color -- A mapping from nodes to their respective color. @@ -1065,7 +1069,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> false -> % Colour case Col = colset_smallest(OkColors), NodeSets1 = hipe_node_sets:add_colored(Node, NodeSets), - Color1 = setColor(Node, Target:physical_name(Col), Color), + Color1 = setColor(Node, physical_name(Col,Target), Color), assignColors_O(Stack1, NodeSets1, Color1, Alias, AllColors, Target) end end. @@ -1079,7 +1083,8 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> %% Regs -- The list of registers to be default colored %% Color -- The color mapping that shall be changed %% NodeSets -- The node sets that shall be updated -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% NewColor -- The updated color mapping @@ -1089,7 +1094,7 @@ assignColors_O(Stack,NodeSets,Color,Alias,AllColors,Target) -> defaultColoring([], Color, NodeSets, _Target) -> {Color,NodeSets}; defaultColoring([Reg|Regs], Color, NodeSets, Target) -> - Color1 = setColor(Reg,Target:physical_name(Reg), Color), + Color1 = setColor(Reg,physical_name(Reg,Target), Color), NodeSets1 = hipe_node_sets:add_colored(Reg, NodeSets), defaultColoring(Regs, Color1, NodeSets1, Target). @@ -1283,7 +1288,7 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst, Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -1293,13 +1298,13 @@ coalesce(Moves, IG, Worklists, Alias, K, Target) -> %% drop coalesced move Move {Moves0, IG, Alias, Worklists}; _ -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V, Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> %% drop constrained move Move {Moves0, IG, Alias, Worklists}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U, Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1350,7 +1355,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> ?debug_msg("Testing nodes ~p and ~p for coalescing~n",[Dest,Source]), Alias_src = getAlias(Source, Alias), Alias_dst = getAlias(Dest, Alias), - {U,V} = case Target:is_precoloured(Alias_dst) of + {U,V} = case is_precoloured(Alias_dst, Target) of true -> {Alias_dst, Alias_src}; false -> {Alias_src, Alias_dst} end, @@ -1361,7 +1366,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> Worklists1 = add_worklist(Worklists, U, K, Moves1, IG, Target), {Moves1, IG, Worklists1, Alias}; _ -> - case (Target:is_precoloured(V) orelse + case (is_precoloured(V, Target) orelse hipe_ig:nodes_are_adjacent(U, V, IG)) of true -> Moves1 = Moves0, % drop constrained move Move @@ -1369,7 +1374,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> Worklists2 = add_worklist(Worklists1, V, K, Moves1, IG, Target), {Moves1, IG, Worklists2, Alias}; false -> - case (case Target:is_precoloured(U) of + case (case is_precoloured(U, Target) of true -> AdjV = hipe_ig:node_adj_list(V, IG), all_adjacent_ok(AdjV, U, Worklists, IG, K, Target); @@ -1405,7 +1410,8 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> %% K -- Number of registers %% Moves -- Current move information %% IG -- Interference graph -%% Target -- The containing the target-specific functions +%% Target -- The containing the target-specific functions, along with +%% its context data. %% %% Returns: %% Worklists (updated) @@ -1413,7 +1419,7 @@ coalesce_O(Moves, IG, Worklists, Alias, K, Target) -> -ifdef(COMPARE_ITERATED_OPTIMISTIC). add_worklist(Worklists, U, K, Moves, IG, Target) -> - case (not(Target:is_precoloured(U)) + case (not(is_precoloured(U, Target)) andalso not(hipe_moves:move_related(U, Moves)) andalso (hipe_ig:is_trivially_colourable(U, K, IG))) of true -> @@ -1524,12 +1530,12 @@ combine(U, V, IG, Alias, Worklists, K, Target) -> combine_edges([], _U, IG, _Worklists, _K, _Target) -> IG; -combine_edges([T|Ts], U, IG, Worklists, K, Target) -> +combine_edges([T|Ts], U, IG, Worklists, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges(Ts, U, IG, Worklists, K, Target); _ -> - IG1 = hipe_ig:add_edge(T, U, IG, Target), - IG2 = case Target:is_precoloured(T) of + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), + IG2 = case is_precoloured(T, Target) of true -> IG1; false -> hipe_ig:dec_node_degree(T, IG1) end, @@ -1559,7 +1565,7 @@ combine_edges([T|Ts], U, IG, Worklists, K, Target) -> -ifdef(COMPARE_ITERATED_OPTIMISTIC). combine_edges_O([], _U, IG, Worklists, Moves, _K, _Target) -> {IG, Worklists, Moves}; -combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> +combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target={TgtMod,TgtCtx}) -> case hipe_reg_worklists:member_stack_or_coalesced(T, Worklists) of true -> combine_edges_O(Ts, U, IG, Worklists, Moves, K, Target); _ -> @@ -1576,7 +1582,7 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% worklist, and that's where decrement_degree() expects to find it. %% This issue is not covered in the published algorithm. OldDegree = hipe_ig:get_node_degree(T, IG), - IG1 = hipe_ig:add_edge(T, U, IG, Target), + IG1 = hipe_ig:add_edge(T, U, IG, TgtMod, TgtCtx), NewDegree = hipe_ig:get_node_degree(T, IG1), Worklists0 = if NewDegree =:= K, OldDegree =:= K-1 -> @@ -1609,7 +1615,8 @@ combine_edges_O([T|Ts], U, IG, Worklists, Moves, K, Target) -> %% Alias -- The Alias vector before undoing %% SavedAdj -- Saved adjacency list %% IG -- Interference graph -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, +%% along with its context data. %% %% Returns: %% list of primitive nodes, that is all nodes that were previously @@ -1676,7 +1683,8 @@ findPrimitiveNodes(Node, N, Alias, PrimitiveNodes) -> %% N -- Node that should be uncoalesced %% SavedAdj -- Saved adjacency list %% IG -- Interference graph -%% Target -- The module containing the target-specific functions. +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% updated Interferece graph @@ -1702,16 +1710,16 @@ fixAdj(N, SavedAdj, IG, Target) -> removeAdj([], _N, _IG, _Target) -> true; -removeAdj([V| New], N, IG, Target) -> - hipe_ig:remove_edge(V, N, IG, Target), +removeAdj([V| New], N, IG, Target={TgtMod,TgtCtx}) -> + hipe_ig:remove_edge(V, N, IG, TgtMod, TgtCtx), removeAdj(New, N, IG, Target). %%restoreAdj([], _N, IG, _Alias, _Target) -> %% %%?debug_msg("adj_lists__after_restore_o ~n~p~n", [hipe_ig:adj_list(IG)]), %% IG; -%%restoreAdj([V| AdjToN], N, IG, Alias, Target) -> +%%restoreAdj([V| AdjToN], N, IG, Alias, Target={TgtMod,TgtCtx}) -> %% AliasToV = getAlias(V, Alias), -%% IG1 = hipe_ig:add_edge(N, AliasToV, IG, Target), +%% IG1 = hipe_ig:add_edge(N, AliasToV, IG, TgtMod, TgtCtx), %% restoreAdj(AdjToN, N, IG1, Alias, Target). %% XXX This is probably a clumsy way of doing it @@ -1744,7 +1752,8 @@ findNew([A| Adj], Saved, New) -> %% R -- Other node to test %% IG -- Interference graph %% K -- Number of registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% true iff coalescing is OK @@ -1752,7 +1761,7 @@ findNew([A| Adj], Saved, New) -> ok(T, R, IG, K, Target) -> ((hipe_ig:is_trivially_colourable(T, K, IG)) - orelse Target:is_precoloured(T) + orelse is_precoloured(T, Target) orelse hipe_ig:nodes_are_adjacent(T, R, IG)). %%---------------------------------------------------------------------- @@ -1765,7 +1774,8 @@ ok(T, R, IG, K, Target) -> %% U -- Node to test for coalescing %% IG -- Interference graph %% K -- Number of registers -%% Target -- The module containing the target-specific functions +%% Target -- The module containing the target-specific functions, along +%% with its context data. %% %% Returns: %% true iff coalescing is OK for all nodes in the list @@ -1923,7 +1933,7 @@ findCheapest([Node|Nodes], IG, Cost, Cheapest, SpillLimit) -> %% limit are extremely expensive. getCost(Node, IG, SpillLimit) -> - case Node > SpillLimit of + case Node >= SpillLimit of true -> inf; false -> SpillCost = hipe_ig:node_spill_cost(Node, IG), @@ -2042,3 +2052,24 @@ freezeEm3(_U,V,_M,K,WorkLists,Moves,IG,_Alias) -> {WorkLists,Moves1} end. -endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to external functions. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +all_precoloured({TgtMod,TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). + +allocatable({TgtMod,TgtCtx}) -> + TgtMod:allocatable(TgtCtx). + +is_precoloured(R, {TgtMod,TgtCtx}) -> + TgtMod:is_precoloured(R,TgtCtx). + +number_of_temporaries(CFG, {TgtMod,TgtCtx}) -> + TgtMod:number_of_temporaries(CFG, TgtCtx). + +physical_name(R, {TgtMod,TgtCtx}) -> + TgtMod:physical_name(R,TgtCtx). diff --git a/lib/hipe/regalloc/hipe_ppc_specific.erl b/lib/hipe/regalloc/hipe_ppc_specific.erl index c49b1e510f..81bb551bd2 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,121 +11,138 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_spill_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> + hipe_ppc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_ppc_registers:nr_args(), hipe_ppc_cfg:params(CFG)). %% same as hipe_ppc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_ppc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_ppc_liveness_gpr:livein(Liveness,L), hipe_ppc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_ppc_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_ppc:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_ppc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_ppc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_ppc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_ppc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(ppc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_ppc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_ppc_cfg:bb_add(CFG,L,BB). + +branch_preds(Instr,_) -> + hipe_ppc_cfg:branch_preds(Instr). + %% PowerPC stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_ppc_defuse:insn_use_gpr(I), hipe_ppc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_ppc_defuse:insn_def_gpr(I), hipe_ppc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_ppc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_ppc:is_pseudo_move(Instruction) of true -> Dst = hipe_ppc:pseudo_move_dst(Instruction), @@ -142,28 +155,60 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +is_spill_move(Instruction, _) -> + hipe_ppc:is_pseudo_spill_move(Instruction). + +reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_ppc:mk_pseudo_move(Dst, Src). + +mk_goto(Label, _) -> + hipe_ppc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(ppc). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, Temp, _) -> + hipe_ppc:mk_temp(Nr, hipe_ppc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_ppc_subst:insn_temps( + fun(Op) -> + case hipe_ppc:temp_is_allocatable(Op) + andalso hipe_ppc:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_ppc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_ppc_registers:temp1() orelse R =:= hipe_ppc_registers:temp2() orelse R =:= hipe_ppc_registers:temp3() orelse hipe_ppc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_ppc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_ppc_registers:args(hipe_ppc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl index 454aa4c686..dcfdf6592c 100644 --- a/lib/hipe/regalloc/hipe_ppc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_ppc_specific_fp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,133 +11,182 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_ppc_specific_fp). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_spill_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_ppc_cfg:init(Defun). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - hipe_ppc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, _) -> + hipe_ppc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_ppc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_ppc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> hipe_ppc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> hipe_ppc_liveness_fpr:liveout(BB_in_out_liveness, Label). %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_ppc_registers:allocatable_fpr(). -all_precoloured() -> - allocatable(). +all_precoloured(Ctx) -> + allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_ppc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_ppc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(ppc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(ppc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_ppc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_ppc_cfg:bb_add(CFG,L,BB). + +branch_preds(Instr,_) -> + hipe_ppc_cfg:branch_preds(Instr). + %% PowerPC stuff -def_use(I) -> - {defines(I), uses(I)}. +def_use(I, Ctx) -> + {defines(I, Ctx), uses(I, Ctx)}. -uses(I) -> +uses(I, _) -> hipe_ppc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) -> hipe_ppc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> + hipe_ppc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) -> hipe_ppc:is_pseudo_fmove(I). -reg_nr(Reg) -> +is_spill_move(I, _) -> + hipe_ppc:is_pseudo_spill_fmove(I). + +reg_nr(Reg, _) -> hipe_ppc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_ppc:mk_pseudo_fmove(Dst, Src). + +mk_goto(Label, _) -> + hipe_ppc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_ppc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(ppc). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(ppc). + +update_reg_nr(Nr, _Temp, _) -> + hipe_ppc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_ppc_subst:insn_temps( + fun(Op) -> + case hipe_ppc:temp_is_allocatable(Op) + andalso hipe_ppc:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + -ifdef(notdef). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_ppc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_ppc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) -> false. -is_arg(_R) -> +is_arg(_R, _) -> false. -args(_CFG) -> +args(_CFG, _) -> []. -endif. diff --git a/lib/hipe/regalloc/hipe_range_split.erl b/lib/hipe/regalloc/hipe_range_split.erl new file mode 100644 index 0000000000..385df695f2 --- /dev/null +++ b/lib/hipe/regalloc/hipe_range_split.erl @@ -0,0 +1,1187 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% TEMPORARY LIVE RANGE SPLITTING PASS +%% +%% Live range splitting is useful to allow a register allocator to allocate a +%% temporary to register for a part of its lifetime, even if it cannot be for +%% the entirety. This improves register allocation quality, at the cost of +%% making the allocation problem more time and memory intensive to solve. +%% +%% Optimal allocation can be achieved if all temporaries are split at every +%% program point (between all instructions), but this makes register allocation +%% infeasably slow in practice. Instead, this module uses heuristics to choose +%% which temporaries should have their live ranges split, and at which points. +%% +%% The range splitter only considers temps which are live during a call +%% instruction, since they're known to be spilled. The control-flow graph is +%% partitioned at call instructions and splitting decisions are made separately +%% for each partition. The register copy of a temp (if any) gets a separate name +%% in each partition. +%% +%% There are three different ways the range splitter may choose to split a +%% temporary in a program partition: +%% +%% * Mode1: Spill the temp before calls, and restore it after them +%% * Mode2: Spill the temp after definitions, restore it after calls +%% * Mode3: Spill the temp after definitions, restore it before uses +%% +%% To pick which of these should be used for each temp×partiton pair, the range +%% splitter uses a cost function. The cost is simply the sum of the cost of all +%% expected stack accesses, and the cost for an individual stack access is based +%% on the probability weight of the basic block that it resides in. This biases +%% the range splitter so that it attempts moving stack accesses from a functions +%% hot path to the cold path. +%% +%% The heuristic has a couple of tuning knobs, adjusting its preference for +%% different spilling modes, aggressiveness, and how much influence the basic +%% block probability weights have. +%% +%% Edge case not handled: Call instructions directly defining a pseudo. In that +%% case, if that pseudo has been selected for mode2 spills, no spill is inserted +%% after the call. +-module(hipe_range_split). + +-export([split/5]). + +-compile(inline). + +%% -define(DO_ASSERT, 1). +%% -define(DEBUG, 1). +-include("../main/hipe.hrl"). + +%% Heuristic tuning constants +-define(DEFAULT_MIN_GAIN, 1.1). % option: range_split_min_gain +-define(DEFAULT_MODE1_FUDGE, 1.1). % option: range_split_mode1_fudge +-define(DEFAULT_WEIGHT_POWER, 2). % option: range_split_weight_power +-define(WEIGHT_CONST_FUN(Power), math:log(Power)/math:log(100)). +-define(WEIGHT_FUN(Wt, Const), math:pow(Wt, Const)). +-define(HEUR_MAX_TEMPS, 20000). + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type liveness() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. +-type liveset() :: ordsets:ordset(temp()). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec split(target_cfg(), liveness(), target_module(), target_context(), + comp_options()) + -> target_cfg(). +split(TCFG0, Liveness, TargetMod, TargetContext, Options) -> + Target = {TargetMod, TargetContext}, + NoTemps = number_of_temporaries(TCFG0, Target), + if NoTemps > ?HEUR_MAX_TEMPS -> + ?debug_msg("~w: Too many temps (~w), falling back on restore_reuse.~n", + [?MODULE, NoTemps]), + hipe_restore_reuse:split(TCFG0, Liveness, TargetMod, TargetContext); + true -> + Wts = compute_weights(TCFG0, TargetMod, TargetContext, Options), + {CFG0, Temps} = convert(TCFG0, Target), + Avail = avail_analyse(TCFG0, Liveness, Target), + Defs = def_analyse(CFG0, TCFG0), + RDefs = rdef_analyse(CFG0), + PLive = plive_analyse(CFG0), + {CFG, DUCounts, Costs, DSets0} = + scan(CFG0, Liveness, PLive, Wts, Defs, RDefs, Avail, Target), + {DSets, _} = hipe_dsets:to_map(DSets0), + Renames = decide(DUCounts, Costs, Target, Options), + rewrite(CFG, TCFG0, Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Internal program representation +%% +%% Second pass: Convert cfg to internal representation + +-record(cfg, { + rpo_labels :: [label()], + bbs :: #{label() => bb()} + }). +-type cfg() :: #cfg{}. + +cfg_bb(L, #cfg{bbs=BBS}) -> maps:get(L, BBS). + +cfg_postorder(#cfg{rpo_labels=RPO}) -> lists:reverse(RPO). + +-record(bb, { + code :: [code_elem()], + %% If the last instruction of code defines all allocatable registers + has_call :: boolean(), + succ :: [label()] + }). +-type bb() :: #bb{}. +-type code_elem() :: instr() | mode2_spills() | mode3_restores(). + +bb_code(#bb{code=Code}) -> Code. +bb_has_call(#bb{has_call=HasCall}) -> HasCall. +bb_succ(#bb{succ=Succ}) -> Succ. + +bb_butlast(#bb{code=Code}) -> + bb_butlast_1(Code). + +bb_butlast_1([_Last]) -> []; +bb_butlast_1([I|Is]) -> [I|bb_butlast_1(Is)]. + +bb_last(#bb{code=Code}) -> lists:last(Code). + +-record(instr, { + i :: target_instr(), + def :: ordsets:ordset(temp()), + use :: ordsets:ordset(temp()) + }). +-type instr() :: #instr{}. + +-record(mode2_spills, { + temps :: ordsets:ordset(temp()) + }). +-type mode2_spills() :: #mode2_spills{}. + +-record(mode3_restores, { + temps :: ordsets:ordset(temp()) + }). +-type mode3_restores() :: #mode3_restores{}. + +-spec convert(target_cfg(), target()) -> {cfg(), temps()}. +convert(CFG, Target) -> + RPO = reverse_postorder(CFG, Target), + {BBsList, Temps} = convert_bbs(RPO, CFG, Target, #{}, []), + {#cfg{rpo_labels = RPO, + bbs = maps:from_list(BBsList)}, + Temps}. + +convert_bbs([], _CFG, _Target, Temps, Acc) -> {Acc, Temps}; +convert_bbs([L|Ls], CFG, Target, Temps0, Acc) -> + Succs = hipe_gen_cfg:succ(CFG, L), + TBB = bb(CFG, L, Target), + TCode = hipe_bb:code(TBB), + {Code, Last, Temps} = convert_code(TCode, Target, Temps0, []), + HasCall = defines_all_alloc(Last#instr.i, Target), + BB = #bb{code = Code, + has_call = HasCall, + succ = Succs}, + convert_bbs(Ls, CFG, Target, Temps, [{L,BB}|Acc]). + +convert_code([], _Target, Temps, [Last|_]=Acc) -> + {lists:reverse(Acc), Last, Temps}; +convert_code([TI|TIs], Target, Temps0, Acc) -> + {TDef, TUse} = def_use(TI, Target), + I = #instr{i = TI, + def = ordsets:from_list(reg_names(TDef, Target)), + use = ordsets:from_list(reg_names(TUse, Target))}, + Temps = add_temps(TUse, Target, add_temps(TDef, Target, Temps0)), + convert_code(TIs, Target, Temps, [I|Acc]). + +-type temps() :: #{temp() => target_temp()}. +add_temps([], _Target, Temps) -> Temps; +add_temps([T|Ts], Target, Temps) -> + add_temps(Ts, Target, Temps#{reg_nr(T, Target) => T}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass: P({DEF}) lattice fwd dataflow (for eliding stores at SPILL +%% splits) +-type defsi() :: #{label() => defseti() | {call, defseti(), defseti()}}. +-type defs() :: #{label() => defsetf()}. + +-spec def_analyse(cfg(), target_cfg()) -> defs(). +def_analyse(CFG = #cfg{rpo_labels = RPO}, TCFG) -> + Defs0 = def_init(CFG), + def_dataf(RPO, TCFG, Defs0). + +-spec def_init(cfg()) -> defsi(). +def_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + false -> def_init_scan(bb_code(BB), defseti_new()); + true -> + {call, def_init_scan(bb_butlast(BB), defseti_new()), + defseti_from_ordset((bb_last(BB))#instr.def)} + end} + end || {L, BB = #bb{has_call=HasCall}} <- maps:to_list(BBs)]). + +def_init_scan([], Defset) -> Defset; +def_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = defseti_add_ordset(Def, Defset0), + def_init_scan(Is, Defset). + +-spec def_dataf([label()], target_cfg(), defsi()) -> defs(). +def_dataf(Labels, TCFG, Defs0) -> + case def_dataf_once(Labels, TCFG, Defs0, 0) of + {Defs, 0} -> + def_finalise(Defs); + {Defs, _Changed} -> + def_dataf(Labels, TCFG, Defs) + end. + +-spec def_finalise(defsi()) -> defs(). +def_finalise(Defs) -> + maps:from_list([{K, defseti_finalise(BL)} + || {K, {call, BL, _}} <- maps:to_list(Defs)]). + +-spec def_dataf_once([label()], target_cfg(), defsi(), non_neg_integer()) + -> {defsi(), non_neg_integer()}. +def_dataf_once([], _TCFG, Defs, Changed) -> {Defs, Changed}; +def_dataf_once([L|Ls], TCFG, Defs0, Changed0) -> + AddPreds = + fun(Defset1) -> + lists:foldl(fun(P, Defset2) -> + defseti_union(defout(P, Defs0), Defset2) + end, Defset1, hipe_gen_cfg:pred(TCFG, L)) + end, + Defset = + case Defset0 = maps:get(L, Defs0) of + {call, Butlast, Defout} -> {call, AddPreds(Butlast), Defout}; + _ -> AddPreds(Defset0) + end, + Changed = case Defset =:= Defset0 of + true -> Changed0; + false -> Changed0+1 + end, + def_dataf_once(Ls, TCFG, Defs0#{L := Defset}, Changed). + +-spec defout(label(), defsi()) -> defseti(). +defout(L, Defs) -> + case maps:get(L, Defs) of + {call, _DefButLast, Defout} -> Defout; + Defout -> Defout + end. + +-spec defbutlast(label(), defs()) -> defsetf(). +defbutlast(L, Defs) -> maps:get(L, Defs). + +-spec defseti_new() -> defseti(). +-spec defseti_union(defseti(), defseti()) -> defseti(). +-spec defseti_add_ordset(ordsets:ordset(temp()), defseti()) -> defseti(). +-spec defseti_from_ordset(ordsets:ordset(temp())) -> defseti(). +-spec defseti_finalise(defseti()) -> defsetf(). +-spec defsetf_member(temp(), defsetf()) -> boolean(). +-spec defsetf_intersect_ordset(ordsets:ordset(temp()), defsetf()) + -> ordsets:ordset(temp()). + +-type defseti() :: bitord(). +defseti_new() -> bitord_new(). +defseti_union(A, B) -> bitord_union(A, B). +defseti_add_ordset(OS, D) -> defseti_union(defseti_from_ordset(OS), D). +defseti_from_ordset(OS) -> bitord_from_ordset(OS). +defseti_finalise(D) -> bitarr_from_bitord(D). + +-type defsetf() :: bitarr(). +defsetf_member(E, D) -> bitarr_get(E, D). + +defsetf_intersect_ordset([], _D) -> []; +defsetf_intersect_ordset([E|Es], D) -> + case bitarr_get(E, D) of + true -> [E|defsetf_intersect_ordset(Es,D)]; + false -> defsetf_intersect_ordset(Es,D) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass: P({DEF}) lattice reverse dataflow (for eliding stores at defines +%% in mode2) +-type rdefsi() :: #{label() => + {call, rdefseti(), [label()]} + | {nocall, rdefseti(), rdefseti(), [label()]}}. +-type rdefs() :: #{label() => {final, rdefsetf(), [label()]}}. + +-spec rdef_analyse(cfg()) -> rdefs(). +rdef_analyse(CFG = #cfg{rpo_labels=RPO}) -> + Defs0 = rdef_init(CFG), + PO = rdef_postorder(RPO, CFG, []), + rdef_dataf(PO, Defs0). + +%% Filter out 'call' labels, since they don't change +-spec rdef_postorder([label()], cfg(), [label()]) -> [label()]. +rdef_postorder([], _CFG, Acc) -> Acc; +rdef_postorder([L|Ls], CFG, Acc) -> + case bb_has_call(cfg_bb(L, CFG)) of + true -> rdef_postorder(Ls, CFG, Acc); + false -> rdef_postorder(Ls, CFG, [L|Acc]) + end. + +-spec rdef_init(cfg()) -> rdefsi(). +rdef_init(#cfg{bbs = BBs}) -> + maps:from_list( + [{L, case HasCall of + true -> + Defin = rdef_init_scan(bb_butlast(BB), rdefseti_empty()), + {call, Defin, Succs}; + false -> + Gen = rdef_init_scan(bb_code(BB), rdefseti_empty()), + {nocall, Gen, rdefseti_top(), Succs} + end} + || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec rdef_init_scan([instr()], rdefseti()) -> rdefseti(). +rdef_init_scan([], Defset) -> Defset; +rdef_init_scan([#instr{def=Def}|Is], Defset0) -> + Defset = rdefseti_add_ordset(Def, Defset0), + rdef_init_scan(Is, Defset). + +-spec rdef_dataf([label()], rdefsi()) -> rdefs(). +rdef_dataf(Labels, Defs0) -> + case rdef_dataf_once(Labels, Defs0, 0) of + {Defs, 0} -> + rdef_finalise(Defs); + {Defs, _Changed} -> + rdef_dataf(Labels, Defs) + end. + +-spec rdef_finalise(rdefsi()) -> rdefs(). +rdef_finalise(Defs) -> + maps:map(fun(L, V) -> + Succs = rsuccs_val(V), + Defout0 = rdefout_intersect(L, Defs, rdefseti_top()), + {final, rdefset_finalise(Defout0), Succs} + end, Defs). + +-spec rdef_dataf_once([label()], rdefsi(), non_neg_integer()) + -> {rdefsi(), non_neg_integer()}. +rdef_dataf_once([], Defs, Changed) -> {Defs, Changed}; +rdef_dataf_once([L|Ls], Defs0, Changed0) -> + #{L := {nocall, Gen, Defin0, Succs}} = Defs0, + Defin = rdefseti_union(Gen, rdefout_intersect(L, Defs0, Defin0)), + Defset = {nocall, Gen, Defin, Succs}, + Changed = case Defin =:= Defin0 of + true -> Changed0; + false -> Changed0+1 + end, + rdef_dataf_once(Ls, Defs0#{L := Defset}, Changed). + +-spec rdefin(label(), rdefsi()) -> rdefseti(). +rdefin(L, Defs) -> rdefin_val(maps:get(L, Defs)). +rdefin_val({nocall, _Gen, Defin, _Succs}) -> Defin; +rdefin_val({call, Defin, _Succs}) -> Defin. + +-spec rsuccs(label(), rdefsi()) -> [label()]. +rsuccs(L, Defs) -> rsuccs_val(maps:get(L, Defs)). +rsuccs_val({nocall, _Gen, _Defin, Succs}) -> Succs; +rsuccs_val({call, _Defin, Succs}) -> Succs. + +-spec rdefout(label(), rdefs()) -> rdefsetf(). +rdefout(L, Defs) -> + #{L := {final, Defout, _Succs}} = Defs, + Defout. + +-spec rdefout_intersect(label(), rdefsi(), rdefseti()) -> rdefseti(). +rdefout_intersect(L, Defs, Init) -> + lists:foldl(fun(S, Acc) -> + rdefseti_intersect(rdefin(S, Defs), Acc) + end, Init, rsuccs(L, Defs)). + +-type rdefseti() :: bitord() | top. +rdefseti_top() -> top. +rdefseti_empty() -> bitord_new(). +-spec rdefseti_from_ordset(ordsets:ordset(temp())) -> rdefseti(). +rdefseti_from_ordset(OS) -> bitord_from_ordset(OS). + +-spec rdefseti_add_ordset(ordsets:ordset(temp()), rdefseti()) -> rdefseti(). +rdefseti_add_ordset(_, top) -> top; % Should never happen in rdef_dataf +rdefseti_add_ordset(OS, D) -> rdefseti_union(rdefseti_from_ordset(OS), D). + +-spec rdefseti_union(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_union(top, _) -> top; +rdefseti_union(_, top) -> top; +rdefseti_union(A, B) -> bitord_union(A, B). + +-spec rdefseti_intersect(rdefseti(), rdefseti()) -> rdefseti(). +rdefseti_intersect(top, D) -> D; +rdefseti_intersect(D, top) -> D; +rdefseti_intersect(A, B) -> bitord_intersect(A, B). + +-type rdefsetf() :: {arr, bitarr()} | top. +-spec rdefset_finalise(rdefseti()) -> rdefsetf(). +rdefset_finalise(top) -> top; +rdefset_finalise(Ord) -> {arr, bitarr_from_bitord(Ord)}. + +%% rdefsetf_top() -> top. +rdefsetf_empty() -> {arr, bitarr_new()}. + +-spec rdefsetf_add_ordset(ordsets:ordset(temp()), rdefsetf()) -> rdefsetf(). +rdefsetf_add_ordset(_, top) -> top; +rdefsetf_add_ordset(OS, {arr, Arr}) -> + {arr, lists:foldl(fun bitarr_set/2, Arr, OS)}. + +-spec rdef_step(instr(), rdefsetf()) -> rdefsetf(). +rdef_step(#instr{def=Def}, Defset) -> + %% ?ASSERT(not defines_all_alloc(I, Target)), + rdefsetf_add_ordset(Def, Defset). + +-spec ordset_subtract_rdefsetf(ordsets:ordset(temp()), rdefsetf()) + -> ordsets:ordset(temp()). +ordset_subtract_rdefsetf(_, top) -> []; +ordset_subtract_rdefsetf(OS, {arr, Arr}) -> + %% Lazy implementation; could do better if OS can grow + lists:filter(fun(E) -> not bitarr_get(E, Arr) end, OS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Integer sets represented as bit sets +%% +%% Two representations; bitord() and bitarr() +-define(LIMB_IX_BITS, 11). +-define(LIMB_BITS, (1 bsl ?LIMB_IX_BITS)). +-define(LIMB_IX(Index), (Index bsr ?LIMB_IX_BITS)). +-define(BIT_IX(Index), (Index band (?LIMB_BITS - 1))). +-define(BIT_MASK(Index), (1 bsl ?BIT_IX(Index))). + +%% bitord(): fast at union/2 and can be compared for equality with '=:=' +-type bitord() :: orddict:orddict(non_neg_integer(), 0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitord_new() -> bitord(). +bitord_new() -> []. + +-spec bitord_union(bitord(), bitord()) -> bitord(). +bitord_union(Lhs, Rhs) -> + orddict:merge(fun(_, L, R) -> L bor R end, Lhs, Rhs). + +-spec bitord_intersect(bitord(), bitord()) -> bitord(). +bitord_intersect([], _) -> []; +bitord_intersect(_, []) -> []; +bitord_intersect([{K, L}|Ls], [{K, R}|Rs]) -> + [{K, L band R} | bitord_intersect(Ls, Rs)]; +bitord_intersect([{LK, _}|Ls], [{RK, _}|_]=Rs) when LK < RK -> + bitord_intersect(Ls, Rs); +bitord_intersect([{LK, _}|_]=Ls, [{RK, _}|Rs]) when LK > RK -> + bitord_intersect(Ls, Rs). + +-spec bitord_from_ordset(ordsets:ordset(non_neg_integer())) -> bitord(). +bitord_from_ordset([]) -> []; +bitord_from_ordset([B|Bs]) -> + bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B)). + +bitord_from_ordset_1([B|Bs], Key, Val) when Key =:= ?LIMB_IX(B) -> + bitord_from_ordset_1(Bs, Key, Val bor ?BIT_MASK(B)); +bitord_from_ordset_1([B|Bs], Key, Val) -> + [{Key,Val} | bitord_from_ordset_1(Bs, ?LIMB_IX(B), ?BIT_MASK(B))]; +bitord_from_ordset_1([], Key, Val) -> [{Key, Val}]. + +%% bitarr(): fast (enough) at get/2 +-type bitarr() :: array:array(0..((1 bsl ?LIMB_BITS)-1)). + +-spec bitarr_new() -> bitarr(). +bitarr_new() -> array:new({default, 0}). + +-spec bitarr_get(non_neg_integer(), bitarr()) -> boolean(). +bitarr_get(Index, Array) -> + Limb = array:get(?LIMB_IX(Index), Array), + 0 =/= (Limb band ?BIT_MASK(Index)). + +-spec bitarr_set(non_neg_integer(), bitarr()) -> bitarr(). +bitarr_set(Index, Array) -> + Limb0 = array:get(?LIMB_IX(Index), Array), + Limb = Limb0 bor ?BIT_MASK(Index), + array:set(?LIMB_IX(Index), Limb, Array). + +-spec bitarr_from_bitord(bitord()) -> bitarr(). +bitarr_from_bitord(Ord) -> + array:from_orddict(Ord, 0). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Sixth pass: Partition-local liveness analysis +%% +%% As temps are not spilled when exiting a partition in mode2, only +%% partition-local uses need to be considered when deciding which temps need +%% restoring at partition entry. + +-type plive() :: #{label() => + {call, liveset(), [label()]} + | {nocall, {liveset(), liveset()}, liveset(), [label()]}}. + +-spec plive_analyse(cfg()) -> plive(). +plive_analyse(CFG) -> + Defs0 = plive_init(CFG), + PO = cfg_postorder(CFG), + plive_dataf(PO, Defs0). + +-spec plive_init(cfg()) -> plive(). +plive_init(#cfg{bbs = BBs}) -> + maps:from_list( + [begin + {L, case HasCall of + true -> + {Gen, _} = plive_init_scan(bb_code(BB)), + {call, Gen, Succs}; + false -> + GenKill = plive_init_scan(bb_code(BB)), + {nocall, GenKill, liveset_empty(), Succs} + end} + end || {L, BB = #bb{has_call=HasCall, succ=Succs}} <- maps:to_list(BBs)]). + +-spec plive_init_scan([instr()]) -> {liveset(), liveset()}. +plive_init_scan([]) -> {liveset_empty(), liveset_empty()}; +plive_init_scan([#instr{def=InstrKill, use=InstrGen}|Is]) -> + {Gen0, Kill0} = plive_init_scan(Is), + Gen1 = liveset_subtract(Gen0, InstrKill), + Gen = liveset_union(Gen1, InstrGen), + Kill1 = liveset_union(Kill0, InstrKill), + Kill = liveset_subtract(Kill1, InstrGen), + {Gen, Kill}. + +-spec plive_dataf([label()], plive()) -> plive(). +plive_dataf(Labels, PLive0) -> + case plive_dataf_once(Labels, PLive0, 0) of + {PLive, 0} -> PLive; + {PLive, _Changed} -> + plive_dataf(Labels, PLive) + end. + +-spec plive_dataf_once([label()], plive(), non_neg_integer()) -> + {plive(), non_neg_integer()}. +plive_dataf_once([], PLive, Changed) -> {PLive, Changed}; +plive_dataf_once([L|Ls], PLive0, Changed0) -> + Liveset = + case Liveset0 = maps:get(L, PLive0) of + {call, Livein, Succs} -> + {call, Livein, Succs}; + {nocall, {Gen, Kill} = GenKill, _OldLivein, Succs} -> + Liveout = pliveout(L, PLive0), + Livein = liveset_union(Gen, liveset_subtract(Liveout, Kill)), + {nocall, GenKill, Livein, Succs} + end, + Changed = case Liveset =:= Liveset0 of + true -> Changed0; + false -> Changed0+1 + end, + plive_dataf_once(Ls, PLive0#{L := Liveset}, Changed). + +-spec pliveout(label(), plive()) -> liveset(). +pliveout(L, PLive) -> + liveset_union([plivein(S, PLive) || S <- psuccs(L, PLive)]). + +-spec psuccs(label(), plive()) -> [label()]. +psuccs(L, PLive) -> psuccs_val(maps:get(L, PLive)). +psuccs_val({call, _Livein, Succs}) -> Succs; +psuccs_val({nocall, _GenKill, _Livein, Succs}) -> Succs. + +-spec plivein(label(), plive()) -> liveset(). +plivein(L, PLive) -> plivein_val(maps:get(L, PLive)). +plivein_val({call, Livein, _Succs}) -> Livein; +plivein_val({nocall, _GenKill, Livein, _Succs}) -> Livein. + +liveset_empty() -> ordsets:new(). +liveset_subtract(A, B) -> ordsets:subtract(A, B). +liveset_union(A, B) -> ordsets:union(A, B). +liveset_union(LivesetList) -> ordsets:union(LivesetList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass: Compute dataflow analyses required for placing mode3 +%% spills/restores. +%% Reuse analysis implementation in hipe_restore_reuse. +%% XXX: hipe_restore_reuse has it's own "rdef"; we would like to reuse that one +%% too. +-type avail() :: hipe_restore_reuse:avail(). + +-spec avail_analyse(target_cfg(), liveness(), target()) -> avail(). +avail_analyse(CFG, Liveness, Target) -> + hipe_restore_reuse:analyse(CFG, Liveness, Target). + +-spec mode3_split_in_block(label(), avail()) -> ordsets:ordset(temp()). +mode3_split_in_block(L, Avail) -> + hipe_restore_reuse:split_in_block(L, Avail). + +-spec mode3_block_renameset(label(), avail()) -> ordsets:ordset(temp()). +mode3_block_renameset(L, Avail) -> + hipe_restore_reuse:renamed_in_block(L, Avail). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Seventh pass +%% +%% Compute program space partitioning, collect information required by the +%% heuristic. +-type part_key() :: label(). +-type part_dsets() :: hipe_dsets:dsets(part_key()). +-type part_dsets_map() :: #{part_key() => part_key()}. +-type ducounts() :: #{part_key() => ducount()}. + +-spec scan(cfg(), liveness(), plive(), weights(), defs(), rdefs(), avail(), + target()) -> {cfg(), ducounts(), costs(), part_dsets()}. +scan(CFG0, Liveness, PLive, Weights, Defs, RDefs, Avail, Target) -> + #cfg{rpo_labels = Labels, bbs = BBs0} = CFG0, + CFG = CFG0#cfg{bbs=#{}}, % kill reference + DSets0 = hipe_dsets:new(Labels), + Costs0 = costs_new(), + {BBs, DUCounts0, Costs1, DSets1} = + scan_bbs(maps:to_list(BBs0), Liveness, PLive, Weights, Defs, RDefs, Avail, + Target, #{}, Costs0, DSets0, []), + {RLList, DSets2} = hipe_dsets:to_rllist(DSets1), + {Costs, DSets} = costs_map_roots(DSets2, Costs1), + DUCounts = collect_ducounts(RLList, DUCounts0, #{}), + {CFG#cfg{bbs=maps:from_list(BBs)}, DUCounts, Costs, DSets}. + +-spec collect_ducounts([{label(), [label()]}], ducounts(), ducounts()) + -> ducounts(). +collect_ducounts([], _, Acc) -> Acc; +collect_ducounts([{R,Ls}|RLs], DUCounts, Acc) -> + DUCount = lists:foldl( + fun(Key, FAcc) -> + ducount_merge(maps:get(Key, DUCounts, ducount_new()), FAcc) + end, ducount_new(), Ls), + collect_ducounts(RLs, DUCounts, Acc#{R => DUCount}). + +-spec scan_bbs([{label(), bb()}], liveness(), plive(), weights(), defs(), + rdefs(), avail(), target(), ducounts(), costs(), part_dsets(), + [{label(), bb()}]) + -> {[{label(), bb()}], ducounts(), costs(), part_dsets()}. +scan_bbs([], _Liveness, _PLive, _Weights, _Defs, _RDefs, _Avail, _Target, + DUCounts, Costs, DSets, Acc) -> + {Acc, DUCounts, Costs, DSets}; +scan_bbs([{L,BB}|BBs], Liveness, PLive, Weights, Defs, RDefs, Avail, Target, + DUCounts0, Costs0, DSets0, Acc) -> + Wt = weight(L, Weights), + {DSets, Costs5, EntryCode, ExitCode, RDefout, Liveout} = + case bb_has_call(BB) of + false -> + DSets1 = lists:foldl(fun(S, DS) -> hipe_dsets:union(L, S, DS) end, + DSets0, bb_succ(BB)), + {DSets1, Costs0, bb_code(BB), [], rdefout(L, RDefs), + liveout(Liveness, L, Target)}; + true -> + LastI = #instr{def=LastDef} = bb_last(BB), + LiveBefore = ordsets:subtract(liveout(Liveness, L, Target), LastDef), + %% We can omit the spill of a temp that has not been defined since the + %% last time it was spilled + SpillSet = defsetf_intersect_ordset(LiveBefore, defbutlast(L, Defs)), + Costs1 = costs_insert(exit, L, Wt, SpillSet, Costs0), + Costs4 = lists:foldl(fun({S, BranchWt}, Costs2) -> + SLivein = livein(Liveness, S, Target), + SPLivein = plivein(S, PLive), + SWt = weight_scaled(L, BranchWt, Weights), + Costs3 = costs_insert(entry1, S, SWt, SLivein, Costs2), + costs_insert(entry2, S, SWt, SPLivein, Costs3) + end, Costs1, branch_preds(LastI#instr.i, Target)), + {DSets0, Costs4, bb_butlast(BB), [LastI], rdefsetf_empty(), LiveBefore} + end, + Mode3Splits = mode3_split_in_block(L, Avail), + {RevEntryCode, Restored} = scan_bb_fwd(EntryCode, Mode3Splits, [], []), + {Code, DUCount, Mode2Spills} = + scan_bb(RevEntryCode, Wt, RDefout, Liveout, ducount_new(), [], ExitCode), + DUCounts = DUCounts0#{L => DUCount}, + M2SpillSet = ordsets:from_list(Mode2Spills), + Costs6 = costs_insert(spill, L, Wt, M2SpillSet, Costs5), + Mode3Renames = mode3_block_renameset(L, Avail), + Costs7 = costs_insert(restore, L, Wt, ordsets:intersection(M2SpillSet, Mode3Renames), Costs6), + Costs8 = costs_insert(restore, L, Wt, ordsets:from_list(Restored), Costs7), + Costs = add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs8), + scan_bbs(BBs, Liveness, PLive, Weights, Defs, RDefs, Avail, Target, DUCounts, + Costs, DSets, [{L,BB#bb{code=Code}}|Acc]). + +-spec add_unsplit_mode3_costs(ducount(), ordsets:ordset(temp()), label(), costs()) + -> costs(). +add_unsplit_mode3_costs(DUCount, Mode3Renames, L, Costs) -> + Unsplit = orddict_without_ordset(Mode3Renames, + orddict:from_list(ducount_to_list(DUCount))), + add_unsplit_mode3_costs_1(Unsplit, L, Costs). + +-spec add_unsplit_mode3_costs_1([{temp(),float()}], label(), costs()) + -> costs(). +add_unsplit_mode3_costs_1([], _L, Costs) -> Costs; +add_unsplit_mode3_costs_1([{T,C}|Cs], L, Costs) -> + add_unsplit_mode3_costs_1(Cs, L, costs_insert(restore, L, C, [T], Costs)). + +%% @doc Returns a new orddict without keys in Set and their associated values. +-spec orddict_without_ordset(ordsets:ordset(K), orddict:orddict(K, V)) + -> orddict:orddict(K, V). +orddict_without_ordset([S|Ss], [{K,_}|_]=Dict) when S < K -> + orddict_without_ordset(Ss, Dict); +orddict_without_ordset([S|_]=Set, [D={K,_}|Ds]) when S > K -> + [D|orddict_without_ordset(Set, Ds)]; +orddict_without_ordset([_S|Ss], [{_K,_}|Ds]) -> % _S == _K + orddict_without_ordset(Ss, Ds); +orddict_without_ordset(_, []) -> []; +orddict_without_ordset([], Dict) -> Dict. + +%% Scans the code forward, collecting and inserting mode3 restores +-spec scan_bb_fwd([instr()], ordsets:ordset(temp()), ordsets:ordset(temp()), + [code_elem()]) + -> {[code_elem()], ordsets:ordset(temp())}. +scan_bb_fwd([], [], Restored, Acc) -> {Acc, Restored}; +scan_bb_fwd([I|Is], SplitHere0, Restored0, Acc0) -> + #instr{def=Def, use=Use} = I, + {ToRestore, SplitHere1} = + lists:partition(fun(R) -> lists:member(R, Use) end, SplitHere0), + SplitHere = lists:filter(fun(R) -> not lists:member(R, Def) end, SplitHere1), + Acc = + case ToRestore of + [] -> [I | Acc0]; + _ -> [I, #mode3_restores{temps=ToRestore} | Acc0] + end, + scan_bb_fwd(Is, SplitHere, ToRestore ++ Restored0, Acc). + +%% Scans the code backwards, collecting def/use counts and mode2 spills +-spec scan_bb([code_elem()], float(), rdefsetf(), liveset(), ducount(), + [temp()], [code_elem()]) + -> {[code_elem()], ducount(), [temp()]}. +scan_bb([], _Wt, _RDefout, _Liveout, DUCount, Spills, Acc) -> + {Acc, DUCount, Spills}; +scan_bb([I=#mode3_restores{}|Is], Wt, RDefout, Liveout, DUCount, Spills, Acc) -> + scan_bb(Is, Wt, RDefout, Liveout, DUCount, Spills, [I|Acc]); +scan_bb([I|Is], Wt, RDefout, Liveout, DUCount0, Spills0, Acc0) -> + #instr{def=Def,use=Use} = I, + DUCount = ducount_add(Use, Wt, ducount_add(Def, Wt, DUCount0)), + Livein = liveness_step(I, Liveout), + RDefin = rdef_step(I, RDefout), + %% The temps that would be spilled after I in mode 2 + NewSpills = ordset_subtract_rdefsetf( + ordsets:intersection(Def, Liveout), + RDefout), + ?ASSERT(NewSpills =:= (NewSpills -- Spills0)), + Spills = NewSpills ++ Spills0, + Acc1 = case NewSpills of + [] -> Acc0; + _ -> [#mode2_spills{temps=NewSpills}|Acc0] + end, + scan_bb(Is, Wt, RDefin, Livein, DUCount, Spills, [I|Acc1]). + +-spec liveness_step(instr(), liveset()) -> liveset(). +liveness_step(#instr{def=Def, use=Use}, Liveout) -> + ordsets:union(Use, ordsets:subtract(Liveout, Def)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass: compute basic-block weighting + +-type weights() :: no_bb_weights + | {hipe_bb_weights:bb_weights(), float()}. + +-spec weight(label(), weights()) -> float(). +weight(L, Weights) -> weight_scaled(L, 1.0, Weights). + +-spec compute_weights(target_cfg(), target_module(), target_context(), + comp_options()) -> weights(). +compute_weights(CFG, TargetMod, TargetContext, Options) -> + case proplists:get_bool(range_split_weights, Options) of + false -> no_bb_weights; + true -> + {hipe_bb_weights:compute(CFG, TargetMod, TargetContext), + ?WEIGHT_CONST_FUN(proplists:get_value(range_split_weight_power, + Options, ?DEFAULT_WEIGHT_POWER))} + end. + +-spec weight_scaled(label(), float(), weights()) -> float(). +weight_scaled(_L, _Scale, no_bb_weights) -> 1.0; +weight_scaled(L, Scale, {Weights, Const}) -> + Wt0 = hipe_bb_weights:weight(L, Weights) * Scale, + Wt = erlang:min(erlang:max(Wt0, 0.0000000000000000001), 10000.0), + ?WEIGHT_FUN(Wt, Const). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Heuristic splitting decision. +%% +%% Decide which temps to split, in which parts, and pick new names for them. +-type spill_mode() :: mode1 % Spill temps at partition exits + | mode2 % Spill temps at definitions + | mode3.% Spill temps at definitions, restore temps at uses +-type ren() :: #{temp() => {spill_mode(), temp()}}. +-type renames() :: #{label() => ren()}. + +-record(heur_par, { + mode1_fudge :: float(), + min_gain :: float() + }). +-type heur_par() :: #heur_par{}. + +-spec decide(ducounts(), costs(), target(), comp_options()) -> renames(). +decide(DUCounts, Costs, Target, Options) -> + Par = #heur_par{ + mode1_fudge = proplists:get_value(range_split_mode1_fudge, Options, + ?DEFAULT_MODE1_FUDGE), + min_gain = proplists:get_value(range_split_min_gain, Options, + ?DEFAULT_MIN_GAIN)}, + decide_parts(maps:to_list(DUCounts), Costs, Target, Par, #{}). + +-spec decide_parts([{part_key(), ducount()}], costs(), target(), + heur_par(), renames()) + -> renames(). +decide_parts([], _Costs, _Target, _Par, Acc) -> Acc; +decide_parts([{Part,DUCount}|Ps], Costs, Target, Par, Acc) -> + Spills = decide_temps(ducount_to_list(DUCount), Part, Costs, Target, Par, + #{}), + decide_parts(Ps, Costs, Target, Par, Acc#{Part => Spills}). + +-spec decide_temps([{temp(), float()}], part_key(), costs(), target(), + heur_par(), ren()) + -> ren(). +decide_temps([], _Part, _Costs, _Target, _Par, Acc) -> Acc; +decide_temps([{Temp, SpillGain}|Ts], Part, Costs, Target, Par, Acc0) -> + SpillCost1 = costs_query(Temp, entry1, Part, Costs) + + costs_query(Temp, exit, Part, Costs), + SpillCost2 = costs_query(Temp, entry2, Part, Costs) + + costs_query(Temp, spill, Part, Costs), + SpillCost3 = costs_query(Temp, restore, Part, Costs), + Acc = + %% SpillCost1 =:= 0.0 usually means the temp is local to the partition; + %% hence no need to split it + case (SpillCost1 =/= 0.0) %% maps:is_key(Temp, S) + andalso (not is_precoloured(Temp, Target)) + andalso ((Par#heur_par.min_gain*SpillCost1 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost2 < SpillGain) + orelse (Par#heur_par.min_gain*SpillCost3 < SpillGain)) + of + false -> Acc0; + true -> + Mode = + if Par#heur_par.mode1_fudge*SpillCost1 < SpillCost2, + Par#heur_par.mode1_fudge*SpillCost1 < SpillCost3 -> + mode1; + SpillCost2 < SpillCost3 -> + mode2; + true -> + mode3 + end, + Acc0#{Temp => {Mode, new_reg_nr(Target)}} + end, + decide_temps(Ts, Part, Costs, Target, Par, Acc). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Eighth pass: Rewrite program performing range splitting. + +-spec rewrite(cfg(), target_cfg(), target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps()) + -> target_cfg(). +rewrite(#cfg{bbs=BBs}, TCFG, Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps) -> + rewrite_bbs(maps:to_list(BBs), Target, Liveness, PLive, Defs, Avail, DSets, + Renames, Temps, TCFG). + +-spec rewrite_bbs([{label(), bb()}], target(), liveness(), plive(), defs(), + avail(), part_dsets_map(), renames(), temps(), target_cfg()) + -> target_cfg(). +rewrite_bbs([], _Target, _Liveness, _PLive, _Defs, _Avail, _DSets, _Renames, + _Temps, TCFG) -> + TCFG; +rewrite_bbs([{L,BB}|BBs], Target, Liveness, PLive, Defs, Avail, DSets, Renames, + Temps, TCFG0) -> + Code0Rev = lists:reverse(bb_code(BB)), + EntryRen = maps:get(maps:get(L,DSets), Renames), + M3Ren = mode3_block_renameset(L, Avail), + SubstFun = rewrite_subst_fun(Target, EntryRen, M3Ren), + Fun = fun(I) -> subst_temps(SubstFun, I, Target) end, + {Code, TCFG} = + case bb_has_call(BB) of + false -> + Code1 = rewrite_instrs(Code0Rev, Fun, EntryRen, M3Ren, Temps, Target, + []), + {Code1, TCFG0}; + true -> + CallI0 = hd(Code0Rev), + Succ = bb_succ(BB), + {CallTI, TCFG1} = inject_restores(Succ, Target, Liveness, PLive, DSets, + Renames, Temps, CallI0#instr.i, TCFG0), + Liveout1 = liveness_step(CallI0, liveout(Liveness, L, Target)), + Defout = defbutlast(L, Defs), + SpillMap = mk_spillmap(EntryRen, Liveout1, Defout, Temps, Target), + Code1 = rewrite_instrs(tl(Code0Rev), Fun, EntryRen, M3Ren, Temps, + Target, []), + Code2 = lift_spills(lists:reverse(Code1), Target, SpillMap, [CallTI]), + {Code2, TCFG1} + end, + TBB = hipe_bb:code_update(bb(TCFG, L, Target), Code), + rewrite_bbs(BBs, Target, Liveness, PLive, Defs, Avail, DSets, Renames, Temps, + update_bb(TCFG, L, TBB, Target)). + +-spec rewrite_instrs([code_elem()], rewrite_fun(), ren(), + ordsets:ordset(temp()), temps(), target(), + [target_instr()]) + -> [target_instr()]. +rewrite_instrs([], _Fun, _Ren, _M3Ren, _Temps, _Target, Acc) -> Acc; +rewrite_instrs([I|Is], Fun, Ren, M3Ren, Temps, Target, Acc0) -> + Acc = + case I of + #instr{i=TI} -> [Fun(TI)|Acc0]; + #mode2_spills{temps=Mode2Spills} -> + add_mode2_spills(Mode2Spills, Target, Ren, M3Ren, Temps, Acc0); + #mode3_restores{temps=Mode3Restores} -> + add_mode3_restores(Mode3Restores, Target, Ren, Temps, Acc0) + end, + rewrite_instrs(Is, Fun, Ren, M3Ren, Temps, Target, Acc). + +-spec add_mode2_spills(ordsets:ordset(temp()), target(), ren(), + ordsets:ordset(temp()), temps(), [target_instr()]) + -> [target_instr()]. +add_mode2_spills([], _Target, _Ren, _M3Ren, _Temps, Acc) -> Acc; +add_mode2_spills([R|Rs], Target, Ren, M3Ren, Temps, Acc0) -> + Acc = + case Ren of + #{R := {Mode, NewName}} when Mode =:= mode2; Mode =:= mode3 -> + case Mode =/= mode3 orelse lists:member(R, M3Ren) of + false -> Acc0; + true -> + #{R := T} = Temps, + SpillInstr = mk_move(update_reg_nr(NewName, T, Target), T, Target), + [SpillInstr|Acc0] + end; + #{} -> + Acc0 + end, + add_mode2_spills(Rs, Target, Ren, M3Ren, Temps, Acc). + +-spec add_mode3_restores(ordsets:ordset(temp()), target(), ren(), temps(), + [target_instr()]) + -> [target_instr()]. +add_mode3_restores([], _Target, _Ren, _Temps, Acc) -> Acc; +add_mode3_restores([R|Rs], Target, Ren, Temps, Acc) -> + case Ren of + #{R := {mode3, NewName}} -> + #{R := T} = Temps, + RestoreInstr = mk_move(T, update_reg_nr(NewName, T, Target), Target), + add_mode3_restores(Rs, Target, Ren, Temps, [RestoreInstr|Acc]); + #{} -> + add_mode3_restores(Rs, Target, Ren, Temps, Acc) + end. + +-type rewrite_fun() :: fun((target_instr()) -> target_instr()). +-type subst_fun() :: fun((target_temp()) -> target_temp()). +-spec rewrite_subst_fun(target(), ren(), ordsets:ordset(temp())) -> subst_fun(). +rewrite_subst_fun(Target, Ren, M3Ren) -> + fun(Temp) -> + Reg = reg_nr(Temp, Target), + case Ren of + #{Reg := {Mode, NewName}} -> + case Mode =/= mode3 orelse lists:member(Reg, M3Ren) of + false -> Temp; + true -> update_reg_nr(NewName, Temp, Target) + end; + #{} -> Temp + end + end. + +-type spillmap() :: [{temp(), target_instr()}]. +-spec mk_spillmap(ren(), liveset(), defsetf(), temps(), target()) + -> spillmap(). +mk_spillmap(Ren, Livein, Defout, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + {NewName, mk_move(update_reg_nr(NewName, Temp, Target), Temp, Target)} + end || {Reg, {mode1, NewName}} <- maps:to_list(Ren), + lists:member(Reg, Livein), defsetf_member(Reg, Defout)]. + +-spec mk_restores(ren(), liveset(), liveset(), temps(), target()) + -> [target_instr()]. +mk_restores(Ren, Livein, PLivein, Temps, Target) -> + [begin + Temp = maps:get(Reg, Temps), + mk_move(Temp, update_reg_nr(NewName, Temp, Target), Target) + end || {Reg, {Mode, NewName}} <- maps:to_list(Ren), + ( (Mode =:= mode1 andalso lists:member(Reg, Livein )) + orelse (Mode =:= mode2 andalso lists:member(Reg, PLivein)))]. + +-spec inject_restores([label()], target(), liveness(), plive(), + part_dsets_map(), renames(), temps(), target_instr(), + target_cfg()) + -> {target_instr(), target_cfg()}. +inject_restores([], _Target, _Liveness, _PLive, _DSets, _Renames, _Temps, CFTI, + TCFG) -> + {CFTI, TCFG}; +inject_restores([L|Ls], Target, Liveness, PLive, DSets, Renames, Temps, CFTI0, + TCFG0) -> + Ren = maps:get(maps:get(L,DSets), Renames), + Livein = livein(Liveness, L, Target), + PLivein = plivein(L, PLive), + {CFTI, TCFG} = + case mk_restores(Ren, Livein, PLivein, Temps, Target) of + [] -> {CFTI0, TCFG0}; % optimisation + Restores -> + RestBBLbl = new_label(Target), + Code = Restores ++ [mk_goto(L, Target)], + CFTI1 = redirect_jmp(CFTI0, L, RestBBLbl, Target), + TCFG1 = update_bb(TCFG0, RestBBLbl, hipe_bb:mk_bb(Code), Target), + {CFTI1, TCFG1} + end, + inject_restores(Ls, Target, Liveness, PLive, DSets, Renames, Temps, CFTI, + TCFG). + +%% Heuristic. Move spills up until we meet the edge of the BB or a definition of +%% that temp. +-spec lift_spills([target_instr()], target(), spillmap(), [target_instr()]) + -> [target_instr()]. +lift_spills([], _Target, SpillMap, Acc) -> + [SpillI || {_, SpillI} <- SpillMap] ++ Acc; +lift_spills([I|Is], Target, SpillMap0, Acc) -> + Def = reg_defines(I, Target), + {Spills0, SpillMap} = + lists:partition(fun({Reg,_}) -> lists:member(Reg, Def) end, SpillMap0), + Spills = [SpillI || {_, SpillI} <- Spills0], + lift_spills(Is, Target, SpillMap, [I|Spills ++ Acc]). + +reg_defines(I, Target) -> + reg_names(defines(I,Target), Target). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Costs ADT +%% +%% Keeps track of cumulative cost of spilling temps in particular partitions +%% using particular spill modes. +-type cost_map() :: #{[part_key()|temp()] => float()}. +-type cost_key() :: entry1 | entry2 | exit | spill | restore. +-record(costs, {entry1 = #{} :: cost_map() + ,entry2 = #{} :: cost_map() + ,exit = #{} :: cost_map() + ,spill = #{} :: cost_map() + ,restore = #{} :: cost_map() + }). +-type costs() :: #costs{}. + +-spec costs_new() -> costs(). +costs_new() -> #costs{}. + +-spec costs_insert(cost_key(), part_key(), float(), liveset(), costs()) + -> costs(). +costs_insert(entry1, A, Weight, Liveset, Costs=#costs{entry1=Entry1}) -> + Costs#costs{entry1=costs_insert_1(A, Weight, Liveset, Entry1)}; +costs_insert(entry2, A, Weight, Liveset, Costs=#costs{entry2=Entry2}) -> + Costs#costs{entry2=costs_insert_1(A, Weight, Liveset, Entry2)}; +costs_insert(exit, A, Weight, Liveset, Costs=#costs{exit=Exit}) -> + Costs#costs{exit=costs_insert_1(A, Weight, Liveset, Exit)}; +costs_insert(spill, A, Weight, Liveset, Costs=#costs{spill=Spill}) -> + Costs#costs{spill=costs_insert_1(A, Weight, Liveset, Spill)}; +costs_insert(restore, A, Weight, Liveset, Costs=#costs{restore=Restore}) -> + Costs#costs{restore=costs_insert_1(A, Weight, Liveset, Restore)}. + +costs_insert_1(A, Weight, Liveset, CostMap0) when is_float(Weight) -> + lists:foldl(fun(Live, CostMap1) -> + map_update_counter([A|Live], Weight, CostMap1) + end, CostMap0, Liveset). + +-spec costs_map_roots(part_dsets(), costs()) -> {costs(), part_dsets()}. +costs_map_roots(DSets0, Costs) -> + {Entry1, DSets1} = costs_map_roots_1(DSets0, Costs#costs.entry1), + {Entry2, DSets2} = costs_map_roots_1(DSets1, Costs#costs.entry2), + {Exit, DSets3} = costs_map_roots_1(DSets2, Costs#costs.exit), + {Spill, DSets4} = costs_map_roots_1(DSets3, Costs#costs.spill), + {Restore, DSets} = costs_map_roots_1(DSets4, Costs#costs.restore), + {#costs{entry1=Entry1,entry2=Entry2,exit=Exit,spill=Spill,restore=Restore}, + DSets}. + +costs_map_roots_1(DSets0, CostMap) -> + {NewEs, DSets} = lists:mapfoldl(fun({[A|T], Wt}, DSets1) -> + {AR, DSets2} = hipe_dsets:find(A, DSets1), + {{[AR|T], Wt}, DSets2} + end, DSets0, maps:to_list(CostMap)), + {maps_from_list_merge(NewEs, fun erlang:'+'/2, #{}), DSets}. + +maps_from_list_merge([], _MF, Acc) -> Acc; +maps_from_list_merge([{K,V}|Ps], MF, Acc) -> + maps_from_list_merge(Ps, MF, case Acc of + #{K := OV} -> Acc#{K := MF(V, OV)}; + #{} -> Acc#{K => V} + end). + +-spec costs_query(temp(), cost_key(), part_key(), costs()) -> float(). +costs_query(Temp, entry1, Part, #costs{entry1=Entry1}) -> + costs_query_1(Temp, Part, Entry1); +costs_query(Temp, entry2, Part, #costs{entry2=Entry2}) -> + costs_query_1(Temp, Part, Entry2); +costs_query(Temp, exit, Part, #costs{exit=Exit}) -> + costs_query_1(Temp, Part, Exit); +costs_query(Temp, spill, Part, #costs{spill=Spill}) -> + costs_query_1(Temp, Part, Spill); +costs_query(Temp, restore, Part, #costs{restore=Restore}) -> + costs_query_1(Temp, Part, Restore). + +costs_query_1(Temp, Part, CostMap) -> + Key = [Part|Temp], + case CostMap of + #{Key := Wt} -> Wt; + #{} -> 0.0 + end. + +-spec map_update_counter(Key, number(), #{Key => number(), OK => OV}) + -> #{Key := number(), OK => OV}. +map_update_counter(Key, Incr, Map) -> + case Map of + #{Key := Orig} -> Map#{Key := Orig + Incr}; + #{} -> Map#{Key => Incr} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Def and use counting ADT +-type ducount() :: #{temp() => float()}. + +-spec ducount_new() -> ducount(). +ducount_new() -> #{}. + +-spec ducount_add([temp()], float(), ducount()) -> ducount(). +ducount_add([], _Weight, DUCount) -> DUCount; +ducount_add([T|Ts], Weight, DUCount0) -> + DUCount = + case DUCount0 of + #{T := Count} -> DUCount0#{T := Count + Weight}; + #{} -> DUCount0#{T => Weight} + end, + ducount_add(Ts, Weight, DUCount). + +ducount_to_list(DUCount) -> maps:to_list(DUCount). + +-spec ducount_merge(ducount(), ducount()) -> ducount(). +ducount_merge(DCA, DCB) when map_size(DCA) < map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCA), DCB); +ducount_merge(DCA, DCB) when map_size(DCA) >= map_size(DCB) -> + ducount_merge_1(ducount_to_list(DCB), DCA). + +ducount_merge_1([], DUCount) -> DUCount; +ducount_merge_1([{T,AC}|Ts], DUCount0) -> + DUCount = + case DUCount0 of + #{T := BC} -> DUCount0#{T := AC + BC}; + #{} -> DUCount0#{T => AC} + end, + ducount_merge_1(Ts, DUCount). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(def_use). +?TGT_IFACE_1(defines). +?TGT_IFACE_1(defines_all_alloc). +?TGT_IFACE_1(is_precoloured). +?TGT_IFACE_1(mk_goto). +?TGT_IFACE_2(mk_move). +?TGT_IFACE_0(new_label). +?TGT_IFACE_0(new_reg_nr). +?TGT_IFACE_1(number_of_temporaries). +?TGT_IFACE_3(redirect_jmp). +?TGT_IFACE_1(reg_nr). +?TGT_IFACE_1(reverse_postorder). +?TGT_IFACE_2(subst_temps). +?TGT_IFACE_3(update_bb). +?TGT_IFACE_2(update_reg_nr). + +branch_preds(Instr, {TgtMod,TgtCtx}) -> + merge_sorted_preds(lists:keysort(1, TgtMod:branch_preds(Instr, TgtCtx))). + +livein(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:livein(Liveness, L, TgtCtx), Target)). + +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)). + +merge_sorted_preds([]) -> []; +merge_sorted_preds([{L, P1}, {L, P2}|LPs]) -> + merge_sorted_preds([{L, P1+P2}|LPs]); +merge_sorted_preds([LP|LPs]) -> [LP|merge_sorted_preds(LPs)]. + +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. diff --git a/lib/hipe/regalloc/hipe_reg_worklists.erl b/lib/hipe/regalloc/hipe_reg_worklists.erl index 88585f9f38..415f1d6122 100644 --- a/lib/hipe/regalloc/hipe_reg_worklists.erl +++ b/lib/hipe/regalloc/hipe_reg_worklists.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%---------------------------------------------------------------------- %%% File : hipe_reg_worklists.erl @@ -30,8 +24,8 @@ -module(hipe_reg_worklists). -author(['Andreas Wallin', 'Thorild Selén']). --export([new/5, % only used by optimistic allocator - new/6, +-export([new/6, % only used by optimistic allocator + new/7, simplify/1, spill/1, freeze/1, @@ -90,29 +84,32 @@ %% %%%---------------------------------------------------------------------- -new(IG, Target, CFG, K, No_temporaries) -> % only used by optimistic allocator +%% only used by optimistic allocator +new(IG, TargetMod, TargetCtx, CFG, K, No_temporaries) -> CoalescedTo = hipe_bifs:array(No_temporaries, 'none'), - init(initial(Target, CFG), K, IG, empty(No_temporaries, CoalescedTo)). + init(initial(TargetMod, TargetCtx, CFG), K, IG, + empty(No_temporaries, CoalescedTo)). -new(IG, Target, CFG, Move_sets, K, No_temporaries) -> - init(initial(Target, CFG), K, IG, Move_sets, empty(No_temporaries, [])). +new(IG, TargetMod, TargetCtx, CFG, Move_sets, K, No_temporaries) -> + init(initial(TargetMod, TargetCtx, CFG), K, IG, Move_sets, + empty(No_temporaries, [])). -initial(Target, CFG) -> - {Min_temporary, Max_temporary} = Target:var_range(CFG), - NonAlloc = Target:non_alloc(CFG), - non_precoloured(Target, Min_temporary, Max_temporary, []) - -- [Target:reg_nr(X) || X <- NonAlloc]. +initial(TargetMod, TargetCtx, CFG) -> + {Min_temporary, Max_temporary} = TargetMod:var_range(CFG, TargetCtx), + NonAlloc = TargetMod:non_alloc(CFG, TargetCtx), + non_precoloured(TargetMod, TargetCtx, Min_temporary, Max_temporary, []) + -- [TargetMod:reg_nr(X, TargetCtx) || X <- NonAlloc]. -non_precoloured(Target, Current, Max_temporary, Initial) -> +non_precoloured(TargetMod, TargetCtx, Current, Max_temporary, Initial) -> if Current > Max_temporary -> Initial; true -> NewInitial = - case Target:is_precoloured(Current) of + case TargetMod:is_precoloured(Current, TargetCtx) of true -> Initial; false -> [Current|Initial] end, - non_precoloured(Target, Current+1, Max_temporary, NewInitial) + non_precoloured(TargetMod, TargetCtx, Current+1, Max_temporary, NewInitial) end. %% construct an empty initialized worklists data structure diff --git a/lib/hipe/regalloc/hipe_regalloc_loop.erl b/lib/hipe/regalloc/hipe_regalloc_loop.erl index d29615a3a0..29ef3adcc2 100644 --- a/lib/hipe/regalloc/hipe_regalloc_loop.erl +++ b/lib/hipe/regalloc/hipe_regalloc_loop.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,44 +11,50 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Common wrapper for graph_coloring and coalescing regallocs. -module(hipe_regalloc_loop). --export([ra/5, ra_fp/4]). +-export([ra/7, ra_fp/6]). %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> - {NewDefun, Coloring, _NewSpillIndex} = - ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod), - {NewDefun, Coloring}. +ra(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, TargetCtx) -> + {NewCFG, Liveness, Coloring, _NewSpillIndex} = + ra_common(CFG, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, + TargetCtx), + {NewCFG, Liveness, Coloring}. -ra_fp(Defun, Options, RegAllocMod, TargetMod) -> - ra_common(Defun, 0, Options, RegAllocMod, TargetMod). +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, TargetCtx) -> + ra_common(CFG, Liveness, 0, Options, RegAllocMod, TargetMod, TargetCtx). -ra_common(Defun, SpillIndex, Options, RegAllocMod, TargetMod) -> +ra_common(CFG0, Liveness0, SpillIndex, Options, RegAllocMod, TargetMod, + TargetCtx) -> ?inc_counter(ra_calls_counter, 1), - CFG = TargetMod:defun_to_cfg(Defun), - SpillLimit = TargetMod:number_of_temporaries(CFG), - alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod). + {CFG1, Liveness1} = + do_range_split(CFG0, Liveness0, TargetMod, TargetCtx, Options), + SpillLimit0 = TargetMod:number_of_temporaries(CFG1, TargetCtx), + {Coloring, _, CFG, Liveness} = + call_allocator_initial(CFG1, Liveness1, SpillLimit0, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx), + %% The first iteration, the hipe_regalloc_prepass may create new temps, these + %% should not end up above SpillLimit. + SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), + alloc(Coloring, CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx). -alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> +alloc(Coloring, CFG0, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) -> ?inc_counter(ra_iteration_counter, 1), - CFG = TargetMod:defun_to_cfg(Defun), - {Coloring, _NewSpillIndex} = - RegAllocMod:regalloc(CFG, SpillIndex, SpillLimit, TargetMod, Options), - {NewDefun, DidSpill} = TargetMod:check_and_rewrite(Defun, Coloring), + {CFG, DidSpill} = TargetMod:check_and_rewrite(CFG0, Coloring, TargetCtx), case DidSpill of false -> %% No new temps, we are done. ?add_spills(Options, _NewSpillIndex), - TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod), - {TempMap2, NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - TargetMod, TempMap), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG0, Liveness, [], SpillIndex, Options, + TargetMod, TargetCtx, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), %% case proplists:get_bool(verbose_spills, Options) of @@ -61,9 +63,55 @@ alloc(Defun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) -> %% false -> %% ok %% end, - {NewDefun, Coloring2, NewSpillIndex2}; + {CFG, Liveness, Coloring2, NewSpillIndex2}; _ -> %% Since SpillLimit is used as a low-water-mark %% the list of temps not to spill is uninteresting. - alloc(NewDefun, SpillLimit, SpillIndex, Options, RegAllocMod, TargetMod) + {NewColoring, _NewSpillIndex} = + call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx), + alloc(NewColoring, CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) + end. + +call_allocator_initial(CFG, Liveness, SpillLimit, SpillIndex, Options, + RegAllocMod, TargetMod, TargetCtx) -> + case proplists:get_bool(ra_prespill, Options) of + true -> + hipe_regalloc_prepass:regalloc_initial( + RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options); + false -> + {C, SI} = RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, + TargetMod, TargetCtx, Options), + {C, SI, CFG, Liveness} + end. + +call_allocator(CFG, Liveness, SpillLimit, SpillIndex, Options, RegAllocMod, + TargetMod, TargetCtx) -> + case proplists:get_bool(ra_prespill, Options) of + true -> + hipe_regalloc_prepass:regalloc( + RegAllocMod, CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options); + false -> + RegAllocMod:regalloc(CFG, Liveness, SpillIndex, SpillLimit, TargetMod, + TargetCtx, Options) + end. + +do_range_split(CFG0, Liveness0, TgtMod, TgtCtx, Options) -> + {CFG2, Liveness1} = + case proplists:get_bool(ra_restore_reuse, Options) of + true -> + CFG1 = hipe_restore_reuse:split(CFG0, Liveness0, TgtMod, TgtCtx), + {CFG1, TgtMod:analyze(CFG1, TgtCtx)}; + false -> + {CFG0, Liveness0} + end, + case proplists:get_bool(ra_range_split, Options) of + true -> + CFG3 = hipe_range_split:split(CFG2, Liveness1, TgtMod, TgtCtx, Options), + {CFG3, TgtMod:analyze(CFG3, TgtCtx)}; + false -> + {CFG2, Liveness1} end. diff --git a/lib/hipe/regalloc/hipe_regalloc_prepass.erl b/lib/hipe/regalloc/hipe_regalloc_prepass.erl new file mode 100644 index 0000000000..5024840237 --- /dev/null +++ b/lib/hipe/regalloc/hipe_regalloc_prepass.erl @@ -0,0 +1,953 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% PREPASS FOR ITERATED REGISTER ALLOCATORS +%% +%% Implements a trivial partial but optimal fast register allocator to be used +%% as the first pass of the register allocation loop. +%% +%% The idea is to drastically reduce the number of temporaries, so as to speed +%% up the real register allocators. +%% +%% * Spills trivially unallocatable temps +%% This relies on the fact that calls intentionally clobber all registers. +%% Since this is the case, any temp that is alive over a call can't possibly +%% be allocated to anything but a spill slot. +%% +%% * Partitions the program at points where no pseudos that were not spiled are +%% live, and then do register allocation on these partitions independently. +%% These program points are commonly, but not exclusively, the call +%% instructions. +%% +%% TODO +%% * This module seems very successful at finding every single spill; register +%% allocation performance should be improved if we short-circuit the first +%% hipe_regalloc_loop iteration, skipping directly to rewrite without ever +%% calling RegAllocMod. +-module(hipe_regalloc_prepass). +-export([regalloc/8, regalloc_initial/8]). + +-ifndef(DEBUG). +-compile(inline). +-endif. + +%%-define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +%%% TUNABLES + +%% Partitions with fewer than ?TUNE_TOO_FEW_BBS basic block halves are merged +%% together before register allocation. +-define(TUNE_TOO_FEW_BBS, 256). + +%% Ignore the ra_partitioned option (and do whole function RA instead) when +%% there are fewer than ?TUNE_MIN_SPLIT_BBS basic blocks. +-define(TUNE_MIN_SPLIT_BBS, 384). + +%% We present a "pseudo-target" to the register allocator we wrap. +-export([analyze/2, + all_precoloured/1, + allocatable/1, + args/2, + bb/3, + def_use/2, + defines/2, + is_fixed/2, % used by hipe_graph_coloring_regalloc + is_global/2, + is_move/2, + is_precoloured/2, + labels/2, + livein/3, + liveout/3, + non_alloc/2, + number_of_temporaries/2, + physical_name/2, + postorder/2, + reg_nr/2, + uses/2, + var_range/2, + reverse_postorder/2]). + +-record(prepass_ctx, + {target_mod :: module() + ,target_ctx :: target_context() + ,sub :: sub_map() % Translates temp numbers found in CFG and understood by + % Target to temp numbers passed to RegAllocMod. + ,inv :: inv_map() % Translates temp numbers passed to RegAllocMod + % to temp numbers found in CFG and understood by + % Target + ,max_phys :: temp() % Exclusive upper bound on physical registers + }). + +-record(cfg, + {cfg :: target_cfg() + ,bbs :: transformed_bbs() + ,max_reg :: temp() % Exclusive upper bound on temp numbers + ,rpostorder :: undefined % Only precomputed with partitioned cfg + | [label()] + }). + +-type bb() :: hipe_bb:bb(). % containing instr() +-type liveset() :: ordsets:ordset(temp()). +-record(transformed_bb, + {bb :: bb() + ,livein :: liveset() + ,liveout :: liveset() + }). +-type transformed_bb() :: #transformed_bb{}. +-type transformed_bbs() :: #{label() => transformed_bb()}. + +-record(instr, + {defuse :: {[temp()], [temp()]} + ,is_move :: boolean() + }). +-type instr() :: #instr{}. + +-type target_cfg() :: any(). +-type target_instr() :: any(). +-type target_temp() :: any(). +-type target_reg() :: non_neg_integer(). +-type target_liveness() :: any(). +-type target_liveset() :: ordsets:ordset(target_reg()). +-type target_context() :: any(). +-type spillno() :: non_neg_integer(). +-type temp() :: non_neg_integer(). +-type label() :: non_neg_integer(). + +-spec regalloc(module(), target_cfg(), target_liveness(), spillno(), spillno(), + module(), target_context(), proplists:proplist()) + -> {hipe_map(), spillno()}. +regalloc(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TargetMod, + TargetCtx, Options) -> + {Coloring, SpillIndex, same} = + regalloc_1(RegAllocMod, CFG, SpillIndex0, SpillLimit, TargetMod, + TargetCtx, Options, Liveness), + {Coloring, SpillIndex}. + +%% regalloc_initial/7 is allowed to introduce new temporaries, unlike +%% regalloc/7. +%% In order for regalloc/7 to never introduce temporaries, regalloc/7 must never +%% choose to do split allocation unless regalloc_initial/7 does. This is the +%% reason that the splitting heuristic is solely based on the number of basic +%% blocks, which does not change during the register allocation loop. +-spec regalloc_initial(module(), target_cfg(), target_liveness(), spillno(), + spillno(), module(), target_context(), + proplists:proplist()) + -> {hipe_map(), spillno(), target_cfg(), + target_liveness()}. +regalloc_initial(RegAllocMod, CFG0, Liveness0, SpillIndex0, SpillLimit, + TargetMod, TargetCtx, Options) -> + {Coloring, SpillIndex, NewCFG} = + regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, + Options, Liveness0), + {CFG, Liveness} = + case NewCFG of + same -> {CFG0, Liveness0}; + {rewritten, CFG1} -> {CFG1, TargetMod:analyze(CFG1, TargetCtx)} + end, + {Coloring, SpillIndex, CFG, Liveness}. + +regalloc_1(RegAllocMod, CFG0, SpillIndex0, SpillLimit, TargetMod, TargetCtx, + Options, Liveness) -> + {ScanBBs, Seen, SpillMap, SpillIndex1} = + scan_cfg(CFG0, Liveness, SpillIndex0, TargetMod, TargetCtx), + + {PartColoring, SpillIndex, NewCFG} = + case proplists:get_bool(ra_partitioned, Options) + andalso length(TargetMod:labels(CFG0, TargetCtx)) > ?TUNE_MIN_SPLIT_BBS + of + true -> + regalloc_partitioned(SpillMap, SpillIndex1, SpillLimit, ScanBBs, + CFG0, TargetMod, TargetCtx, RegAllocMod, Options); + _ -> + regalloc_whole(Seen, SpillMap, SpillIndex1, SpillLimit, ScanBBs, + CFG0, TargetMod, TargetCtx, RegAllocMod, Options) + end, + + SpillColors = [{T, {spill, S}} || {T, S} <- maps:to_list(SpillMap)], + Coloring = SpillColors ++ PartColoring, + + ?ASSERT(begin + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + Unused = unused(live_pseudos(Seen, SpillMap, MaxPhys), + SpillMap, CFG0, TargetMod, TargetCtx), + unused_unused(Unused, CFG0, TargetMod, TargetCtx) + end), + ?ASSERT(begin + CFG = + case NewCFG of + same -> CFG0; + {rewritten, CFG1} -> CFG1 + end, + check_coloring(Coloring, CFG, TargetMod, TargetCtx) + end), % Sanity-check + ?ASSERT(just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, + TargetMod, TargetCtx, Options, SpillMap, Coloring, + Unused)), + {Coloring, SpillIndex, NewCFG}. + +regalloc_whole(Seen, SpillMap, SpillIndex0, SpillLimit, ScanBBs, + CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = + number_and_map(AllPrecoloured, LivePseudos, SpillLimit), + BBs = transform_whole_cfg(ScanBBs, SubMap), + SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR}, + SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, + max_phys=MaxPhys, inv=InvMap, sub=SubMap}, + {SubColoring, SpillIndex} = + RegAllocMod:regalloc(SubMod, SubMod, SpillIndex0, SubSpillLimit, ?MODULE, + SubContext, Options), + ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), + {translate_coloring(SubColoring, InvMap), SpillIndex, same}. + +regalloc_partitioned(SpillMap, SpillIndex0, SpillLimit, ScanBBs, + CFG, TargetMod, TargetCtx, RegAllocMod, Options) -> + AllPrecoloured = TargetMod:all_precoloured(TargetCtx), + MaxPhys = lists:max(AllPrecoloured) + 1, + + DSets0 = initial_dsets(CFG, TargetMod, TargetCtx), + PartBBList = part_cfg(ScanBBs, SpillMap, MaxPhys), + DSets1 = join_whole_blocks(PartBBList, DSets0), + {PartBBsRLList, DSets2} = merge_small_parts(DSets1), + {PartBBs, DSets3} = merge_pointless_splits(PartBBList, ScanBBs, DSets2), + SeenMap = collect_seenmap(PartBBsRLList, PartBBs), + {RPostMap, _DSets4} = part_order(TargetMod:reverse_postorder(CFG, TargetCtx), + DSets3), + + {Allocations, SpillIndex} = + lists:mapfoldl( + fun({Root, Elems}, SpillIndex1) -> + #{Root := Seen} = SeenMap, + #{Root := RPost} = RPostMap, + LivePseudos = live_pseudos(Seen, SpillMap, MaxPhys), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit} = + number_and_map(AllPrecoloured, LivePseudos, SpillLimit), + BBs = transform_cfg(Elems, PartBBs, SubMap), + SubMod = #cfg{cfg=CFG, bbs=BBs, max_reg=MaxR, rpostorder=RPost}, + SubContext = #prepass_ctx{target_mod=TargetMod, target_ctx=TargetCtx, + max_phys=MaxPhys, inv=InvMap, sub=SubMap}, + {SubColoring, SpillIndex2} = + RegAllocMod:regalloc(SubMod, SubMod, SpillIndex1, SubSpillLimit, + ?MODULE, SubContext, Options), + ?ASSERT(check_coloring(SubColoring, SubMod, ?MODULE, SubContext)), + {{translate_coloring(SubColoring, InvMap), Elems}, SpillIndex2} + end, SpillIndex0, PartBBsRLList), + {Coloring, NewCFG} = + combine_allocations(Allocations, MaxPhys, PartBBs, TargetMod, TargetCtx, + CFG), + {Coloring, SpillIndex, NewCFG}. + +-spec number_and_map([target_reg()], target_liveset(), target_reg()) + -> {sub_map(), inv_map(), temp(), temp(), temp()}. +number_and_map(Phys, Pseud, SpillLimit) -> + MaxPhys = lists:max(Phys) + 1, + ?ASSERT(Pseud =:= [] orelse lists:min(Pseud) >= MaxPhys), + NrPseuds = length(Pseud), + MaxR = MaxPhys+NrPseuds, + PseudNrs = lists:zip(Pseud, lists:seq(MaxPhys, MaxR-1)), + MapList = lists:zip(Phys, Phys) % Physicals are identity-mapped + ++ PseudNrs, + ?ASSERT(MapList =:= lists:ukeysort(1, MapList)), + SubMap = {s,maps:from_list(MapList)}, + InvMap = {i,maps:from_list([{Fake, Real} || {Real, Fake} <- MapList])}, + SubSpillLimit = translate_spill_limit(MapList, SpillLimit), + {SubMap, InvMap, MaxPhys, MaxR, SubSpillLimit}. + +-spec translate_spill_limit([{target_reg(), temp()}], target_reg()) -> temp(). +translate_spill_limit([{Real,Fake}], SpillLimit) when Real < SpillLimit -> + Fake + 1; +translate_spill_limit([{Real,_}|Ps], SpillLimit) when Real < SpillLimit -> + translate_spill_limit(Ps, SpillLimit); +translate_spill_limit([{Real,Fake}|_], SpillLimit) when Real >= SpillLimit -> + Fake. + +-spec live_pseudos(seen(), spill_map(), target_reg()) -> target_liveset(). +live_pseudos(Seen, SpillMap, MaxPhys) -> + %% When SpillMap is much larger than Seen (which is typical in the partitioned + %% case), it is much more efficient doing it like this than making an ordset + %% of the spills and subtracting. + ordsets:from_list( + lists:filter(fun(R) -> R >= MaxPhys andalso not maps:is_key(R, SpillMap) + end, maps:keys(Seen))). + +-spec translate_coloring(hipe_map(), inv_map()) -> hipe_map(). +translate_coloring(SubColoring, InvMap) -> + lists:map(fun({T, P}) -> {imap_get(T, InvMap), P} end, SubColoring). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% First pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Spill trivially unallocatable temps, create internal target-independent +%% program representation, and collect a set of all used temps. +-record(spill_state, + {map :: spill_map() + ,ix :: spillno() + }). +-type spill_state() :: #spill_state{}. +-type spill_map() :: #{target_reg() => spillno()}. + +-spec scan_cfg(target_cfg(), target_liveness(), spillno(), module(), + target_context()) + -> {scan_bbs() + ,seen() + ,spill_map() + ,spillno() + }. +scan_cfg(CFG, Liveness, SpillIndex0, TgtMod, TgtCtx) -> + State0 = #spill_state{map=#{}, ix=SpillIndex0}, + {BBs, Seen, #spill_state{map=Spill, ix=SpillIndex}} = + scan_bbs(TgtMod:labels(CFG,TgtCtx), CFG, Liveness, #{}, State0, #{}, TgtMod, + TgtCtx), + {BBs, Seen, Spill, SpillIndex}. + +-type seen() :: #{target_reg() => []}. % set +-type scan_bb() :: {[instr()], target_liveset(), target_liveset()}. +-type scan_bbs() :: #{label() => scan_bb()}. + +-spec scan_bbs([label()], target_cfg(), target_liveness(), seen(), + spill_state(), scan_bbs(), module(), target_context()) + -> {scan_bbs(), seen(), spill_state()}. +scan_bbs([], _CFG, _Liveness, Seen, State, BBs, _TgtMod, _TgtCtx) -> + {BBs, Seen, State}; +scan_bbs([L|Ls], CFG, Liveness, Seen0, State0, BBs, TgtMod, TgtCtx) -> + Liveout = t_liveout(Liveness, L, TgtMod, TgtCtx), + {Code, Livein, Seen, State} = + scan_bb(lists:reverse(hipe_bb:code(TgtMod:bb(CFG, L, TgtCtx))), Liveout, + Seen0, State0, [], TgtMod, TgtCtx), + BB = {Code, Livein, Liveout}, + scan_bbs(Ls, CFG, Liveness, Seen, State, BBs#{L=>BB}, TgtMod, TgtCtx). + +-spec scan_bb([target_instr()], target_liveset(), seen(), spill_state(), + [instr()], module(), target_context()) + -> {[instr()] + ,target_liveset() + ,seen() + ,spill_state() + }. +scan_bb([], Live, Seen, State, IAcc, _TgtMod, _TgtCtx) -> + {IAcc, Live, Seen, State}; +scan_bb([I|Is], Live0, Seen0, State0, IAcc0, TgtMod, TgtCtx) -> + {TDef, TUse} = TgtMod:def_use(I,TgtCtx), + ?ASSERT(TDef =:= TgtMod:defines(I,TgtCtx)), + ?ASSERT(TUse =:= TgtMod:uses(I,TgtCtx)), + Def = ordsets:from_list(reg_names(TDef, TgtMod, TgtCtx)), + Use = ordsets:from_list(reg_names(TUse, TgtMod, TgtCtx)), + Live = ordsets:union(Use, ToSpill = ordsets:subtract(Live0, Def)), + Seen = add_seen(Def, add_seen(Use, Seen0)), + NewI = #instr{defuse={Def, Use}, is_move=TgtMod:is_move(I,TgtCtx)}, + IAcc = [NewI|IAcc0], + State = + case TgtMod:defines_all_alloc(I,TgtCtx) of + false -> State0; + true -> spill_all(ToSpill, TgtMod, TgtCtx, State0) + end, + %% We can drop "no-ops" here; where (if anywhere) is it worth it? + scan_bb(Is, Live, Seen, State, IAcc, TgtMod, TgtCtx). + +-spec t_liveout(target_liveness(), label(), module(), target_context()) -> + target_liveset(). +t_liveout(Liveness, L, TgtMod, TgtCtx) -> + %% FIXME: unnecessary sort; liveout is sorted, reg_names(...) should be sorted + %% or consist of a few sorted subsequences (per type) + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), TgtMod, + TgtCtx)). + +-spec reg_names([target_temp()], module(), target_context()) -> [target_reg()]. +reg_names(Regs, TgtMod, TgtCtx) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +-spec add_seen([target_reg()], seen()) -> seen(). +add_seen([], Seen) -> Seen; +add_seen([R|Rs], Seen) -> add_seen(Rs, Seen#{R=>[]}). + +-spec spill_all([target_reg()], module(), target_context(), spill_state()) -> + spill_state(). +spill_all([], _TgtMod, _TgtCtx, State) -> State; +spill_all([R|Rs], TgtMod, TgtCtx, State=#spill_state{map=Map, ix=Ix}) -> + case TgtMod:is_precoloured(R,TgtCtx) or maps:is_key(R, Map) of + true -> spill_all(Rs, TgtMod, TgtCtx, State); + false -> spill_all(Rs, TgtMod, TgtCtx, + State#spill_state{map=Map#{R=>Ix}, ix=Ix+1}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (without split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_whole_cfg(scan_bbs(), sub_map()) -> transformed_bbs(). +transform_whole_cfg(BBs0, SubMap) -> + maps:map(fun(_, BB) -> transform_whole_bb(BB, SubMap) end, BBs0). + +-spec transform_whole_bb(scan_bb(), sub_map()) -> transformed_bb(). +transform_whole_bb({Code, Livein, Liveout}, SubMap) -> + #transformed_bb{ + bb=hipe_bb:mk_bb([I#instr{defuse={smap_get_all_partial(Def, SubMap), + smap_get_all_partial(Use, SubMap)}} + || I = #instr{defuse={Def,Use}} <- Code]) + %% Assume mapping preserves monotonicity + ,livein=smap_get_all_partial(Livein, SubMap) + ,liveout=smap_get_all_partial(Liveout, SubMap) + }. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Second pass (with split) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Discover program partitioning +%% Regretfully, this needs to be a separate pass, as having the global live set +%% is crucial to get a useful partitioning. + +%% Single-block parts are merged if there are multiple in a single block, as it +%% is judged to not be beneficial to make them too small. + +-type part_bb_part() :: {[instr()], target_liveset(), target_liveset()}. +-type part_bb() :: {single, part_bb_part()} + | {split, part_bb_part(), part_bb_part()}. +-type part_bb_list() :: [{label(), part_bb()}]. +-type part_bbs() :: #{label() => part_bb()}. +-type part_bb_sofar() :: single + | {split, [instr()], target_liveset()}. % , target_liveset() + +-spec part_cfg(scan_bbs(), spill_map(), target_reg()) -> part_bb_list(). +part_cfg(ScanBBs, SpillMap, MaxPhys) -> + Liveset = mk_part_liveset(SpillMap, MaxPhys), + lists:map(fun(BB) -> part_bb(BB, Liveset) end, maps:to_list(ScanBBs)). + +-spec part_bb({label(), scan_bb()}, part_liveset()) -> {label(), part_bb()}. +part_bb({L, BB0={Code0, Livein, Liveout}}, Liveset) -> + {Sofar, NewCode} = part_bb_1(lists:reverse(Code0), Liveset, Liveout, []), + BB = case Sofar of + single -> + ?ASSERT(Code0 =:= NewCode), + {single, BB0}; + {split, ExitCode, ExitLivein = EntryLiveout} -> + {split, {NewCode, Livein, EntryLiveout}, + {ExitCode, ExitLivein, Liveout}} + end, + {L, BB}. + +-spec part_bb_1([instr()], part_liveset(), target_liveset(), [instr()]) + -> {part_bb_sofar(), [instr()]}. +part_bb_1([], _Liveset, _Livein, IAcc) -> {single, IAcc}; +part_bb_1([I=#instr{defuse={Def,Use}}|Is], Liveset, Live0, IAcc0) -> + Live = ordsets:union(Use, ordsets:subtract(Live0, Def)), + IAcc = [I|IAcc0], + case part_none_live(Live, Liveset) of + false -> part_bb_1(Is, Liveset, Live, IAcc); + %% One split point will suffice + true -> {{split, IAcc, Live}, lists:reverse(Is)} + end. + +-spec part_none_live(target_liveset(), part_liveset()) -> boolean(). +part_none_live(Live, Liveset) -> + not lists:any(fun(R) -> part_liveset_is_live(R, Liveset) end, Live). + +-type part_liveset() :: {spill_map(), target_reg()}. + +-spec mk_part_liveset(spill_map(), target_reg()) -> part_liveset(). +mk_part_liveset(SpillMap, MaxPhys) -> {SpillMap, MaxPhys}. + +-spec part_liveset_is_live(target_reg(), part_liveset()) -> boolean(). +part_liveset_is_live(R, {SpillMap, MaxPhys}) when is_integer(R) -> + R >= MaxPhys andalso not maps:is_key(R, SpillMap). + +%% @doc Merges split blocks where entry and exit belong to the same DSet. +%% Does not change DSets +-spec merge_pointless_splits(part_bb_list(), scan_bbs(), bb_dsets()) + -> {part_bbs(), bb_dsets()}. +merge_pointless_splits(PartBBList0, ScanBBs, DSets0) -> + {PartBBList, DSets} = + merge_pointless_splits_1(PartBBList0, ScanBBs, DSets0, []), + {maps:from_list(PartBBList), DSets}. + +-spec merge_pointless_splits_1( + part_bb_list(), scan_bbs(), bb_dsets(), part_bb_list()) + -> {part_bb_list(), bb_dsets()}. +merge_pointless_splits_1([], _ScanBBs, DSets, Acc) -> {Acc, DSets}; +merge_pointless_splits_1([P={_,{single,_}}|Ps], ScanBBs, DSets, Acc) -> + merge_pointless_splits_1(Ps, ScanBBs, DSets, [P|Acc]); +merge_pointless_splits_1([P0={L,{split,_,_}}|Ps], ScanBBs, DSets0, Acc) -> + {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0), + {ExitRoot, DSets} = hipe_dsets:find({exit,L}, DSets1), + case EntryRoot =:= ExitRoot of + false -> merge_pointless_splits_1(Ps, ScanBBs, DSets, [P0|Acc]); + true -> + %% Reuse the code list from ScanBBs rather than concatenating the split + %% parts + #{L := BB} = ScanBBs, + ?ASSERT(begin + {L,{split,{_EntryCode,_,_},{_ExitCode,_,_}}}=P0, % [_| + {_Code,_,_}=BB, + _Code =:= (_EntryCode ++ _ExitCode) + end), + merge_pointless_splits_1(Ps, ScanBBs, DSets, [{L,{single, BB}}|Acc]) + end. + +-spec merge_small_parts(bb_dsets()) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts(DSets0) -> + {RLList, DSets1} = hipe_dsets:to_rllist(DSets0), + RLLList = [{R, length(Elems), Elems} || {R, Elems} <- RLList], + merge_small_parts_1(RLLList, DSets1, []). + +-spec merge_small_parts_1( + [{bb_dset_key(), non_neg_integer(), [bb_dset_key()]}], + bb_dsets(), bb_dsets_rllist() + ) -> {bb_dsets_rllist(), bb_dsets()}. +merge_small_parts_1([], DSets, Acc) -> {Acc, DSets}; +merge_small_parts_1([{R, _, Es}], DSets, Acc) -> {[{R, Es}|Acc], DSets}; +merge_small_parts_1([{R, L, Es}|Ps], DSets, Acc) when L >= ?TUNE_TOO_FEW_BBS -> + merge_small_parts_1(Ps, DSets, [{R,Es}|Acc]); +merge_small_parts_1([Fst,{R, L, Es}|Ps], DSets, Acc) + when L >= ?TUNE_TOO_FEW_BBS -> + merge_small_parts_1([Fst|Ps], DSets, [{R,Es}|Acc]); +merge_small_parts_1([{R1,L1,Es1},{R2,L2,Es2}|Ps], DSets0, Acc) -> + ?ASSERT(L1 < ?TUNE_TOO_FEW_BBS andalso L2 < ?TUNE_TOO_FEW_BBS), + DSets1 = hipe_dsets:union(R1, R2, DSets0), + {R, DSets} = hipe_dsets:find(R1, DSets1), + merge_small_parts_1([{R,L2+L1,Es2++Es1}|Ps], DSets, Acc). + +%% @doc Partition an ordering over BBs into subsequences for the dsets that +%% contain them. +%% Does not change dsets. +-spec part_order([label()], bb_dsets()) + -> {#{bb_dset_key() => [label()]}, bb_dsets()}. +part_order(Lbs, DSets) -> part_order(Lbs, DSets, #{}). + +part_order([], DSets, Acc) -> {Acc, DSets}; +part_order([L|Ls], DSets0, Acc0) -> + {EntryRoot, DSets1} = hipe_dsets:find({entry,L}, DSets0), + {ExitRoot, DSets2} = hipe_dsets:find({exit,L}, DSets1), + Acc1 = map_append(EntryRoot, L, Acc0), + %% Only include the label once if both entry and exit is in same partition + Acc2 = case EntryRoot =:= ExitRoot of + true -> Acc1; + false -> map_append(ExitRoot, L, Acc1) + end, + part_order(Ls, DSets2, Acc2). + +map_append(Key, Elem, Map) -> + case Map of + #{Key := List} -> Map#{Key := [Elem|List]}; + #{} -> Map#{Key => [Elem]} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Interference graph partitioning +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% We partition the program + +%% The algorithm considers two kinds of components; those that are local to a +%% basic block, and those that are not. The key is that any basic block belongs +%% to at most two non-local components; one from the beginning to the first +%% split point, and one from the end to the last split point. + +-type bb_dset_key() :: {entry | exit, label()}. +-type bb_dsets() :: hipe_dsets:dsets(bb_dset_key()). +-type bb_dsets_rllist() :: [{bb_dset_key(), [bb_dset_key()]}]. + +-spec initial_dsets(target_cfg(), module(), target_context()) -> bb_dsets(). +initial_dsets(CFG, TgtMod, TgtCtx) -> + Labels = TgtMod:labels(CFG, TgtCtx), + DSets0 = hipe_dsets:new(lists:append([[{entry,L},{exit,L}] || L <- Labels])), + Edges = lists:append([[{L, S} || S <- hipe_gen_cfg:succ(CFG, L)] + || L <- Labels]), + lists:foldl(fun({X, Y}, DS) -> hipe_dsets:union({exit,X}, {entry,Y}, DS) end, + DSets0, Edges). + +-spec join_whole_blocks(part_bb_list(), bb_dsets()) -> bb_dsets(). +join_whole_blocks(PartBBList, DSets0) -> + lists:foldl(fun({L, {single, _}}, DS) -> + hipe_dsets:union({entry,L}, {exit,L}, DS); + ({_, {split, _, _}}, DS) -> DS + end, DSets0, PartBBList). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Third pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Collect all referenced temps in each partition. + +%% Note: The temps could be collected during the partition pass for each +%% half-bb, and then combined here. Would that be beneficial? + +collect_seenmap(PartBBsRLList, PartBBs) -> + collect_seenmap(PartBBsRLList, #{}, PartBBs). + +collect_seenmap([], Acc, _PartBBs) -> Acc; +collect_seenmap([{R,Elems}|Ps], Acc, PartBBs) -> + Seen = collect_seen_part(Elems, #{}, PartBBs), + collect_seenmap(Ps, Acc#{R => Seen}, PartBBs). + +collect_seen_part([], Acc, _PartBBs) -> Acc; +collect_seen_part([{Half,L}|Es], Acc0, PartBBs) -> + BB = maps:get(L, PartBBs), + Code = case {Half, BB} of + {entry, {single, {C,_,_}}} -> C; + {entry, {split, {C,_,_}, _}} -> C; + {exit, {split, _, {C,_,_}}} -> C; + {exit, {single, _}} -> [] % Ignore; was collected by its entry half + end, + Acc = collect_seen_code(Code, Acc0), + collect_seen_part(Es, Acc, PartBBs). + +collect_seen_code([], Acc) -> Acc; +collect_seen_code([#instr{defuse={Def,Use}}|Is], Acc) -> + collect_seen_code(Is, add_seen(Def, add_seen(Use, Acc))). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fourth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite CFG to the new temp names. +-spec transform_cfg([bb_dset_key()], part_bbs(), sub_map()) -> transformed_bbs(). + +transform_cfg(Elems, PartBBs, SubMap) -> + transform_cfg(Elems, PartBBs, SubMap, #{}). + +transform_cfg([], _PartBBs, _SubMap, Acc) -> Acc; +transform_cfg([{Half,L}|Es], PartBBs, SubMap, Acc0) -> + #{L := PBB} = PartBBs, + Acc = case {Half, PBB} of + {entry, {single,BB}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {entry, {split,BB,_}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {exit, {split,_,BB}} -> Acc0#{L=>transform_bb(BB, SubMap)}; + {exit, {single, _}} -> Acc0 % Was included by the entry half + end, + transform_cfg(Es, PartBBs, SubMap, Acc). + +-spec transform_bb(part_bb_part(), sub_map()) -> transformed_bb(). +transform_bb(BB, SubMap) -> + %% For now, part_bb_part() and split_bb() share representation + transform_whole_bb(BB, SubMap). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fifth pass +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Combine colorings and substitute temps in actual cfg if there were +%% collisions. + +%% A temp can sometimes appear in more than one partition. For example, defining +%% an unused value. If these are found by combine_allocations, we have to +%% rename this temp in one of the partitions on the real cfg. +%% +%% We optimistically assume that there will be no such collisions, and when +%% there are, we fix them up as they're found. + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), + part_bbs(), module(), target_context(), target_cfg()) + -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([{A,_}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, CFG) -> + {Phys, Pseuds} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), + {Seen, _, []} = partition_by_seen(Pseuds, #{}, [], []), + combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, Pseuds, + {same, CFG}). + +-spec combine_allocations([{hipe_map(), [bb_dset_key()]}], target_reg(), + part_bbs(), module(), target_context(), hipe_map(), + seen(), hipe_map(), {same|rewritten, target_cfg()}) + -> {hipe_map(), same | {rewritten, target_cfg()}}. +combine_allocations([], _MaxPhys, _PartBBs, _TgtMod, _TgtCtx, Phys, _Seen, + Pseuds, CFGT) -> + {Phys ++ Pseuds, case CFGT of + {same, _} -> same; + {rewritten, _} -> CFGT + end}; +combine_allocations([{A,PartElems}|As], MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, + Seen0, Acc, CFGT={_,CFG0}) -> + {Phys, Pseuds0} = lists:partition(fun({R,_}) -> R < MaxPhys end, A), + {Seen, Pseuds, Collisions} = partition_by_seen(Pseuds0, Seen0, [], []), + case Collisions of + [] -> combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, + Pseuds++Acc, CFGT); + _ -> + %% There were collisions; rename all the temp numbers in Collisions + {CFG, Renamed} = rename(Collisions, PartElems, PartBBs, TgtMod, TgtCtx, + CFG0), + combine_allocations(As, MaxPhys, PartBBs, TgtMod, TgtCtx, Phys, Seen, + Pseuds++Renamed++Acc, {rewritten,CFG}) + end. + +%% @doc Partitions a coloring on whether the registers are in the Seen set, +%% adding any new registers to the set. +-spec partition_by_seen(hipe_map(), seen(), hipe_map(), hipe_map()) + -> {seen(), hipe_map(), hipe_map()}. +partition_by_seen([], Seen, Acc, Collisions) -> {Seen, Acc, Collisions}; +partition_by_seen([C={R,_}|Cs], Seen, Acc, Colls) -> + case Seen of + #{R := _} -> partition_by_seen(Cs, Seen, Acc, [C|Colls]); + #{} -> partition_by_seen(Cs, Seen#{R => []}, [C|Acc], Colls) + end. + +-spec rename(hipe_map(), [bb_dset_key()], part_bbs(), module(), + target_context(), target_cfg()) + -> {target_cfg(), hipe_map()}. +rename(CollisionList, PartElems, PartBBs, TgtMod, TgtCtx, CFG0) -> + {Map, Renamed} = new_names(CollisionList, TgtMod, TgtCtx, #{}, []), + Fun = fun(I) -> + TgtMod:subst_temps( + fun(Temp) -> + N = TgtMod:reg_nr(Temp, TgtCtx), + case Map of + #{N := Subst} -> TgtMod:update_reg_nr(Subst, Temp, TgtCtx); + #{} -> Temp + end + end, I, TgtCtx) + end, + {rename_1(PartElems, PartBBs, TgtMod, TgtCtx, Fun, CFG0), Renamed}. + +-type rename_map() :: #{target_reg() => target_reg()}. +-type rename_fun() :: fun((target_instr()) -> target_instr()). + +-spec new_names(hipe_map(), module(), target_context(), rename_map(), + hipe_map()) + -> {rename_map(), hipe_map()}. +new_names([], _TgtMod, _TgtCtx, Map, Renamed) -> {Map, Renamed}; +new_names([{R,C}|As], TgtMod, TgtCtx, Map, Renamed) -> + Subst = TgtMod:new_reg_nr(TgtCtx), + new_names(As, TgtMod, TgtCtx, Map#{R => Subst}, [{Subst, C} | Renamed]). + +%% @doc Maps over all instructions in a partition on the original CFG. +-spec rename_1([bb_dset_key()], part_bbs(), module(), target_context(), + rename_fun(), target_cfg()) -> target_cfg(). +rename_1([], _PartBBs, _TgtMod, _TgtCtx, _Fun, CFG) -> CFG; +rename_1([{Half,L}|Es], PartBBs, TgtMod, TgtCtx, Fun, CFG0) -> + Code0 = hipe_bb:code(BB = TgtMod:bb(CFG0, L, TgtCtx)), + Code = case {Half, maps:get(L, PartBBs)} of + {entry, {single,_}} -> lists:map(Fun, Code0); + {entry, {split,PBBP,_}} -> + map_start(Fun, part_bb_part_len(PBBP), Code0); + {exit, {split,_,PBBP}} -> + map_end(Fun, part_bb_part_len(PBBP), Code0); + {exit, {single, _}} -> Code0 + end, + CFG = TgtMod:update_bb(CFG0, L, hipe_bb:code_update(BB, Code), TgtCtx), + rename_1(Es, PartBBs, TgtMod, TgtCtx, Fun, CFG). + +-spec part_bb_part_len(part_bb_part()) -> non_neg_integer(). +part_bb_part_len({Code, _Livein, _Liveout}) -> length(Code). + +%% @doc Map the first N elements of a list +-spec map_start(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_start(_Fun, 0, List) -> List; +map_start(Fun, N, [E|Es]) -> + [Fun(E)|map_start(Fun, N-1, Es)]. + +%% @doc Map the last N elements of a list +-spec map_end(fun((X) -> Y), non_neg_integer(), [X]) -> [X|Y]. +map_end(Fun, N, List) -> + map_end(Fun, N, length(List), List). + +map_end(Fun, N, Len, [E|Es]) when Len > N -> [E|map_end(Fun, N, Len-1, Es)]; +map_end(Fun, N, Len, List) when Len =:= N -> lists:map(Fun, List). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Temp map ADT +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-type sub_map() :: {s,#{target_reg() => temp()}}. +-type inv_map() :: {i,#{temp() => target_reg()}}. + +-spec smap_get(target_reg(), sub_map()) -> temp(). +smap_get(Temp, {s,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec imap_get(temp(), inv_map()) -> target_reg(). +imap_get(Temp, {i,Map}) when is_integer(Temp) -> maps:get(Temp, Map). + +-spec smap_get_all_partial([target_reg()], sub_map()) -> [temp()]. +smap_get_all_partial([], _) -> []; +smap_get_all_partial([T|Ts], SMap={s,Map}) when is_integer(T) -> + case Map of + #{T := R} -> [R|smap_get_all_partial(Ts, SMap)]; + #{} -> smap_get_all_partial(Ts, SMap) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Validation +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-ifdef(DO_ASSERT). +%%%%%%%%%%%%%%%%%%%% +%% Check that the coloring is correct (if the IG is correct): +%% + +%% Define these as 'ok' or 'report(X,Y)' depending on how much output you want. +-define(report0(X,Y), ?IF_DEBUG_LEVEL(0,?msg(X, Y),ok)). +-define(report(X,Y), ?IF_DEBUG_LEVEL(1,?msg(X, Y),ok)). +-define(report2(X,Y), ?IF_DEBUG_LEVEL(2,?msg(X, Y),ok)). +-define(report3(X,Y), ?IF_DEBUG_LEVEL(3,?msg(X, Y),ok)). + +check_coloring(Coloring, CFG, TgtMod, TgtCtx) -> + ?report0("checking coloring ~p~n",[Coloring]), + IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), + check_cols(hipe_vectors:list(hipe_ig:adj_list(IG)), + init_coloring(Coloring, TgtMod, TgtCtx)). + +init_coloring(Xs, TgtMod, TgtCtx) -> + hipe_temp_map:cols2tuple(Xs, TgtMod, TgtCtx). + +check_color_of(X, Cols) -> + case hipe_temp_map:find(X, Cols) of + unknown -> + uncolored; + C -> + C + end. + +check_cols([], _Cols) -> + ?report("coloring valid~n",[]), + true; +check_cols([{X,Neighbours}|Xs], Cols) -> + Cs = [{N, check_color_of(N, Cols)} || N <- Neighbours], + C = check_color_of(X, Cols), + case valid_coloring(X, C, Cs) of + yes -> + check_cols(Xs, Cols); + {no,Invalids} -> + ?msg("node ~p has same color (~p) as ~p~n", [X,C,Invalids]), + check_cols(Xs, Cols) andalso false + end. + +valid_coloring(_X, _C, []) -> + yes; +valid_coloring(X, C, [{Y,C}|Ys]) -> + case valid_coloring(X, C, Ys) of + yes -> {no, [Y]}; + {no,Zs} -> {no, [Y|Zs]} + end; +valid_coloring(X, C, [_|Ys]) -> + valid_coloring(X, C, Ys). + +unused_unused(Unused, CFG, TgtMod, TgtCtx) -> + IG = hipe_ig:build(CFG, TgtMod:analyze(CFG,TgtCtx), TgtMod, TgtCtx), + lists:all(fun(R) -> case hipe_ig:get_node_degree(R, IG) of + 0 -> true; + Deg -> + ?msg("Temp ~w is in unused but has degree ~w~n", + [R, Deg]), + false + end end, Unused). + +%%%%%%%%%%%%%%%%%%%% +%% Check that no register allocation opportunities were missed due to ?MODULE +%% +just_as_good_as(RegAllocMod, CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, + TgtCtx, Options, SpillMap, Coloring, Unused) -> + {CheckColoring, _} = + RegAllocMod:regalloc(CFG, Liveness, SpillIndex0, SpillLimit, TgtMod, TgtCtx, + Options), + Now = lists:sort([{R,Kind} || {R,{Kind,_}} <- Coloring, + not ordsets:is_element(R, Unused)]), + Check = lists:sort([{R,Kind} || {R,{Kind,_}} <- CheckColoring, + not ordsets:is_element(R, Unused)]), + CheckMap = maps:from_list(Check), + SaneSpills = all_spills_sane_1(CheckColoring, SpillMap), + case SaneSpills + andalso lists:all(fun({R, spill}) -> maps:get(R, CheckMap) =:= spill; + ({_,reg}) -> true + end, Now) + of + true -> true; + false -> + {NowRegs, _} = _NowCount = count(Now), + {CheckRegs, _} = _CheckCount = count(Check), + {M,F,A} = element(2, element(3, CFG)), + io:fwrite(standard_error, "Colorings differ (~w, ~w)!~n" + "MFA: ~w:~w/~w~n" + "Unused: ~w~n" + "Now:~w~nCorrect:~w~n", + [TgtMod, RegAllocMod, + M,F,A, + Unused, + Now -- Check, Check -- Now]), + SaneSpills andalso NowRegs >= CheckRegs + end. + +count(C) -> {length([[] || {_, reg} <- C]), + length([[] || {_, spill} <- C])}. + +unused(LivePseudos, SpillMap, CFG, TgtMod, TgtCtx) -> + {TMin, TMax} = TgtMod:var_range(CFG,TgtCtx), + SpillOSet = ordsets:from_list(maps:keys(SpillMap)), + PhysOSet = ordsets:from_list(TgtMod:all_precoloured(TgtCtx)), + Used = ordsets:union(LivePseudos, ordsets:union(PhysOSet, SpillOSet)), + ordsets:subtract(lists:seq(TMin, TMax), Used). + +%% Check that no temp that we wrote off was actually allocatable. +all_spills_sane_1(_, Empty) when map_size(Empty) =:= 0 -> true; +all_spills_sane_1([], _Nonempty) -> false; +all_spills_sane_1([{T, {reg, _}}|Cs], SpillMap) -> + not maps:is_key(T, SpillMap) andalso all_spills_sane_1(Cs, SpillMap); +all_spills_sane_1([{T, {spill, _}}|Cs], SpillMap) -> + all_spills_sane_1(Cs, maps:remove(T, SpillMap)). + +-endif. % DO_ASSERT + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Pseudo-target interface +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +analyze(Cfg, _ModRec) -> Cfg. +bb(Cfg=#cfg{bbs=BBs}, Ix, _ModRec) -> + case BBs of + #{Ix := #transformed_bb{bb=BB}} -> BB; + _ -> error(badarg, [Cfg, Ix]) + end. +args(Arity, #prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx, sub=SubM}) -> + smap_get(TgtMod:args(Arity,TgtCtx), SubM). +labels(#cfg{bbs=BBs}, _ModRec) -> maps:keys(BBs). +livein(#cfg{bbs=BBs}, Lb, _SubMod) -> + #{Lb := #transformed_bb{livein=Livein}} = BBs, + Livein. +liveout(#cfg{bbs=BBs}, Lb, _SubMod) -> + #{Lb := #transformed_bb{liveout=Liveout}} = BBs, + Liveout. +uses(I, MR) -> element(2, def_use(I, MR)). +defines(I, MR) -> element(1, def_use(I, MR)). +def_use(#instr{defuse=DefUse}, _ModRec) -> DefUse. +is_move(#instr{is_move=IM}, _ModRec) -> IM. +is_fixed(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx,inv=InvM}) -> + TgtMod:is_fixed(imap_get(Reg, InvM),TgtCtx). % XXX: Is this hot? +is_global(Reg, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, + max_phys=MaxPhys}) when Reg < MaxPhys -> + TgtMod:is_global(Reg,TgtCtx). % assume id-map +is_precoloured(Reg, #prepass_ctx{max_phys=MaxPhys}) -> Reg < MaxPhys. +reg_nr(Reg, _ModRec) -> Reg. % After mapping (naturally) +non_alloc(#cfg{cfg=CFG}, #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx, + sub=SubM}) -> + smap_get_all_partial(reg_names(TgtMod:non_alloc(CFG,TgtCtx), TgtMod, TgtCtx), + SubM). +number_of_temporaries(#cfg{max_reg=MaxR}, _ModRec) -> MaxR. +allocatable(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> + TgtMod:allocatable(TgtCtx). % assume id-map +physical_name(Reg, _ModRec) -> Reg. +all_precoloured(#prepass_ctx{target_mod=TgtMod, target_ctx=TgtCtx}) -> + TgtMod:all_precoloured(TgtCtx). % dito +var_range(#cfg{cfg=_CFG, max_reg=MaxReg}, + #prepass_ctx{target_mod=_TgtMod, target_ctx=_TgtCtx}) -> + ?ASSERT(begin {TgtMin, _} = _TgtMod:var_range(_CFG,_TgtCtx), + TgtMin =:= 0 + end), + {0, MaxReg-1}. + +postorder(#cfg{cfg=CFG,rpostorder=undefined}, + #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> + TgtMod:postorder(CFG,TgtCtx); +postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> + lists:reverse(Labels). + +reverse_postorder(#cfg{cfg=CFG,rpostorder=undefined}, + #prepass_ctx{target_mod=TgtMod,target_ctx=TgtCtx}) -> + TgtMod:reverse_postorder(CFG,TgtCtx); +reverse_postorder(#cfg{rpostorder=Labels}, _ModRec) when is_list(Labels) -> + Labels. diff --git a/lib/hipe/regalloc/hipe_restore_reuse.erl b/lib/hipe/regalloc/hipe_restore_reuse.erl new file mode 100644 index 0000000000..2158bd185e --- /dev/null +++ b/lib/hipe/regalloc/hipe_restore_reuse.erl @@ -0,0 +1,516 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% RESTORE REUSE LIVE RANGE SPLITTING PASS +%% +%% This is a simple live range splitter that tries to avoid sequences where a +%% temporary is accessed on stack multiple times by keeping a copy of that temp +%% around in a register. +%% +%% At any point where a temporary that is expected to be spilled (see uses of +%% spills_add_list/2) is defined or used, this pass considers that temporary +%% "available". +%% +%% Limitations: +%% * If a live range part starts with several different restores, this module +%% will introduce a new temp number for each of them, and later be forced to +%% generate phi blocks. It would be more efficient to introduce just a +%% single temp number. That would also remove the need for the phi blocks. +%% * If a live range part ends in a definition, that definition should just +%% define the base temp rather than the substitution, since some CISC +%% targets might be able to inline the memory access in the instruction. +-module(hipe_restore_reuse). + +-export([split/4]). + +%% Exports for hipe_range_split, which uses restore_reuse as one possible spill +%% "mode" +-export([analyse/3 + ,renamed_in_block/2 + ,split_in_block/2 + ]). +-export_type([avail/0]). + +-compile(inline). + +%% -define(DO_ASSERT, 1). +-include("../main/hipe.hrl"). + +-type target_cfg() :: any(). +-type liveness() :: any(). +-type target_module() :: module(). +-type target_context() :: any(). +-type target() :: {target_module(), target_context()}. +-type label() :: non_neg_integer(). +-type reg() :: non_neg_integer(). +-type instr() :: any(). +-type temp() :: any(). + +-spec split(target_cfg(), liveness(), target_module(), target_context()) + -> target_cfg(). +split(CFG, Liveness, TargetMod, TargetContext) -> + Target = {TargetMod, TargetContext}, + Avail = analyse(CFG, Liveness, Target), + rewrite(CFG, Target, Avail). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-opaque avail() :: #{label() => avail_bb()}. + +-record(avail_bb, { + %% Blocks where HasCall is true are considered to have too high + %% register pressure to support a register copy of a temp + has_call :: boolean(), + %% AvailOut: Temps that can be split (are available) + out :: availset(), + %% Gen: AvailOut generated locally + gen :: availset(), + %% WantIn: Temps that are split + want :: regset(), + %% Self: Temps with avail-want pairs locally + self :: regset(), + %% DefIn: Temps shadowed by later def in same live range part + defin :: regset(), + pred :: [label()], + succ :: [label()] + }). +-type avail_bb() :: #avail_bb{}. + +avail_get(L, Avail) -> maps:get(L, Avail). +avail_set(L, Val, Avail) -> maps:put(L, Val, Avail). +avail_has_call(L, Avail) -> (avail_get(L, Avail))#avail_bb.has_call. +avail_out(L, Avail) -> (avail_get(L, Avail))#avail_bb.out. +avail_self(L, Avail) -> (avail_get(L, Avail))#avail_bb.self. +avail_pred(L, Avail) -> (avail_get(L, Avail))#avail_bb.pred. +avail_succ(L, Avail) -> (avail_get(L, Avail))#avail_bb.succ. + +avail_in(L, Avail) -> + case avail_pred(L, Avail) of + [] -> availset_empty(); % entry + Pred -> + lists:foldl(fun(P, ASet) -> + availset_intersect(avail_out(P, Avail), ASet) + end, availset_top(), Pred) + end. + +want_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.want. +want_out(L, Avail) -> + lists:foldl(fun(S, Set) -> + ordsets:union(want_in(S, Avail), Set) + end, ordsets:new(), avail_succ(L, Avail)). + +def_in(L, Avail) -> (avail_get(L, Avail))#avail_bb.defin. +def_out(L, Avail) -> + case avail_succ(L, Avail) of + [] -> ordsets:new(); % entry + Succ -> + ordsets:intersection([def_in(S, Avail) || S <- Succ]) + end. + +-type regset() :: ordsets:ordset(reg()). +-type availset() :: top | regset(). +availset_empty() -> []. +availset_top() -> top. +availset_intersect(top, B) -> B; +availset_intersect(A, top) -> A; +availset_intersect(A, B) -> ordsets:intersection(A, B). +availset_union(top, _) -> top; +availset_union(_, top) -> top; +availset_union(A, B) -> ordsets:union(A, B). +ordset_intersect_availset(OS, top) -> OS; +ordset_intersect_availset(OS, AS) -> ordsets:intersection(OS, AS). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Analysis pass +%% +%% The analysis pass collects the set of temps we're interested in splitting +%% (Spills), and computes three dataflow analyses for this subset of temps. +%% +%% Avail, which is the set of temps which are available in register from a +%% previous (potential) spill or restore without going through a HasCall +%% block. +%% Want, which is a liveness analysis for the subset of temps used by an +%% instruction that are also in Avail at that point. In other words, Want is +%% the set of temps that are split (has a register copy) at a particular +%% point. +%% Def, which are the temps that are already going to be spilled later, and so +%% need not be spilled when they're defined. +%% +%% Lastly, it computes the set Self for each block, which is the temps that have +%% avail-want pairs in the same block, and so should be split in that block even +%% if they're not in WantIn for the block. + +-spec analyse(target_cfg(), liveness(), target()) -> avail(). +analyse(CFG, Liveness, Target) -> + Avail0 = analyse_init(CFG, Liveness, Target), + RPO = reverse_postorder(CFG, Target), + AvailLs = [L || L <- RPO, not avail_has_call(L, Avail0)], + Avail1 = avail_dataf(AvailLs, Avail0), + Avail2 = analyse_filter_want(maps:keys(Avail1), Avail1), + PO = lists:reverse(RPO), + want_dataf(PO, Avail2). + +-spec analyse_init(target_cfg(), liveness(), target()) -> avail(). +analyse_init(CFG, Liveness, Target) -> + analyse_init(labels(CFG, Target), CFG, Liveness, Target, #{}, []). + +-spec analyse_init([label()], target_cfg(), liveness(), target(), spillset(), + [{label(), avail_bb()}]) + -> avail(). +analyse_init([], _CFG, _Liveness, Target, Spills0, Acc) -> + %% Precoloured temps can't be spilled + Spills = spills_filter(fun(R) -> not is_precoloured(R, Target) end, Spills0), + analyse_init_1(Acc, Spills, []); +analyse_init([L|Ls], CFG, Liveness, Target, Spills0, Acc) -> + {DefIn, Gen, Self, Want, HasCall0} = + analyse_scan(hipe_bb:code(bb(CFG, L, Target)), Target, + ordsets:new(), ordsets:new(), ordsets:new(), + ordsets:new()), + {Spills, Out, HasCall} = + case HasCall0 of + false -> {Spills0, availset_top(), false}; + {true, CallDefs} -> + Spill = ordsets:subtract(liveout(Liveness, L, Target), CallDefs), + {spills_add_list(Spill, Spills0), Gen, true} + end, + Pred = hipe_gen_cfg:pred(CFG, L), + Succ = hipe_gen_cfg:succ(CFG, L), + Val = #avail_bb{gen=Gen, want=Want, self=Self, out=Out, has_call=HasCall, + pred=Pred, succ=Succ, defin=DefIn}, + analyse_init(Ls, CFG, Liveness, Target, Spills, [{L, Val} | Acc]). + +-spec analyse_init_1([{label(), avail_bb()}], spillset(), + [{label(), avail_bb()}]) + -> avail(). +analyse_init_1([], _Spills, Acc) -> maps:from_list(Acc); +analyse_init_1([{L, Val0}|Vs], Spills, Acc) -> + #avail_bb{out=Out,gen=Gen,want=Want,self=Self} = Val0, + Val = Val0#avail_bb{ + out = spills_filter_availset(Out, Spills), + gen = spills_filter_availset(Gen, Spills), + want = spills_filter_availset(Want, Spills), + self = spills_filter_availset(Self, Spills)}, + analyse_init_1(Vs, Spills, [{L, Val} | Acc]). + +-type spillset() :: #{reg() => []}. +-spec spills_add_list([reg()], spillset()) -> spillset(). +spills_add_list([], Spills) -> Spills; +spills_add_list([R|Rs], Spills) -> spills_add_list(Rs, Spills#{R => []}). + +-spec spills_filter_availset(availset(), spillset()) -> availset(). +spills_filter_availset([E|Es], Spills) -> + case Spills of + #{E := _} -> [E|spills_filter_availset(Es, Spills)]; + #{} -> spills_filter_availset(Es, Spills) + end; +spills_filter_availset([], _) -> []; +spills_filter_availset(top, _) -> top. + +spills_filter(Fun, Spills) -> maps:filter(fun(K, _) -> Fun(K) end, Spills). + +-spec analyse_scan([instr()], target(), Defset, Gen, Self, Want) + -> {Defset, Gen, Self, Want, HasCall} when + HasCall :: false | {true, regset()}, + Defset :: regset(), + Gen :: availset(), + Self :: regset(), + Want :: regset(). +analyse_scan([], _Target, Defs, Gen, Self, Want) -> + {Defs, Gen, Self, Want, false}; +analyse_scan([I|Is], Target, Defs0, Gen0, Self0, Want0) -> + {DefL, UseL} = reg_def_use(I, Target), + Use = ordsets:from_list(UseL), + Def = ordsets:from_list(DefL), + Self = ordsets:union(ordsets:intersection(Use, Gen0), Self0), + Want = ordsets:union(ordsets:subtract(Use, Defs0), Want0), + Defs = ordsets:union(Def, Defs0), + case defines_all_alloc(I, Target) of + true -> + [] = Is, %assertion + {Defs, ordsets:new(), Self, Want, {true, Def}}; + false -> + Gen = ordsets:union(ordsets:union(Def, Use), Gen0), + analyse_scan(Is, Target, Defs, Gen, Self, Want) + end. + +-spec avail_dataf([label()], avail()) -> avail(). +avail_dataf(RPO, Avail0) -> + case avail_dataf_once(RPO, Avail0, 0) of + {Avail, 0} -> Avail; + {Avail, _Changed} -> + avail_dataf(RPO, Avail) + end. + +-spec avail_dataf_once([label()], avail(), non_neg_integer()) + -> {avail(), non_neg_integer()}. +avail_dataf_once([], Avail, Changed) -> {Avail, Changed}; +avail_dataf_once([L|Ls], Avail0, Changed0) -> + ABB = #avail_bb{out=OldOut, gen=Gen} = avail_get(L, Avail0), + In = avail_in(L, Avail0), + {Changed, Avail} = + case availset_union(In, Gen) of + OldOut -> {Changed0, Avail0}; + Out -> {Changed0+1, avail_set(L, ABB#avail_bb{out=Out}, Avail0)} + end, + avail_dataf_once(Ls, Avail, Changed). + +-spec analyse_filter_want([label()], avail()) -> avail(). +analyse_filter_want([], Avail) -> Avail; +analyse_filter_want([L|Ls], Avail0) -> + ABB = #avail_bb{want=Want0, defin=DefIn0} = avail_get(L, Avail0), + In = avail_in(L, Avail0), + Want = ordset_intersect_availset(Want0, In), + DefIn = ordset_intersect_availset(DefIn0, In), + Avail = avail_set(L, ABB#avail_bb{want=Want, defin=DefIn}, Avail0), + analyse_filter_want(Ls, Avail). + +-spec want_dataf([label()], avail()) -> avail(). +want_dataf(PO, Avail0) -> + case want_dataf_once(PO, Avail0, 0) of + {Avail, 0} -> Avail; + {Avail, _Changed} -> + want_dataf(PO, Avail) + end. + +-spec want_dataf_once([label()], avail(), non_neg_integer()) + -> {avail(), non_neg_integer()}. +want_dataf_once([], Avail, Changed) -> {Avail, Changed}; +want_dataf_once([L|Ls], Avail0, Changed0) -> + ABB0 = #avail_bb{want=OldIn,defin=OldDef} = avail_get(L, Avail0), + AvailIn = avail_in(L, Avail0), + Out = want_out(L, Avail0), + DefOut = def_out(L, Avail0), + {Changed, Avail} = + case {ordsets:union(ordset_intersect_availset(Out, AvailIn), OldIn), + ordsets:union(ordset_intersect_availset(DefOut, AvailIn), OldDef)} + of + {OldIn, OldDef} -> {Changed0, Avail0}; + {In, DefIn} -> + ABB = ABB0#avail_bb{want=In,defin=DefIn}, + {Changed0+1, avail_set(L, ABB, Avail0)} + end, + want_dataf_once(Ls, Avail, Changed). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Rewrite pass +-type subst_dict() :: orddict:orddict(reg(), reg()). +-type input() :: #{label() => subst_dict()}. + +-spec rewrite(target_cfg(), target(), avail()) -> target_cfg(). +rewrite(CFG, Target, Avail) -> + RPO = reverse_postorder(CFG, Target), + rewrite(RPO, Target, Avail, #{}, CFG). + +-spec rewrite([label()], target(), avail(), input(), target_cfg()) + -> target_cfg(). +rewrite([], _Target, _Avail, _Input, CFG) -> CFG; +rewrite([L|Ls], Target, Avail, Input0, CFG0) -> + SplitHere = split_in_block(L, Avail), + {Input1, LInput} = + case Input0 of + #{L := LInput0} -> {Input0, LInput0}; + #{} -> {Input0#{L => []}, []} % entry block + end, + ?ASSERT([] =:= [X || X <- SplitHere, orddict:is_key(X, LInput)]), + ?ASSERT(want_in(L, Avail) =:= orddict:fetch_keys(LInput)), + {CFG1, LOutput} = + case {SplitHere, LInput} of + {[], []} -> % optimisation (rewrite will do nothing, so skip it) + {CFG0, LInput}; + _ -> + Code0 = hipe_bb:code(BB=bb(CFG0, L, Target)), + DefOut = def_out(L, Avail), + {Code, LOutput0, _DefIn} = + rewrite_instrs(Code0, Target, LInput, DefOut, SplitHere), + {update_bb(CFG0, L, hipe_bb:code_update(BB, Code), Target), LOutput0} + end, + {Input, CFG} = rewrite_succs(avail_succ(L, Avail), Target, L, LOutput, Avail, + Input1, CFG1), + rewrite(Ls, Target, Avail, Input, CFG). + +-spec renamed_in_block(label(), avail()) -> ordsets:ordset(reg()). +renamed_in_block(L, Avail) -> + ordsets:union([avail_self(L, Avail), want_in(L, Avail), + want_out(L, Avail)]). + +-spec split_in_block(label(), avail()) -> ordsets:ordset(reg()). +split_in_block(L, Avail) -> + ordsets:subtract(ordsets:union(avail_self(L, Avail), want_out(L, Avail)), + want_in(L, Avail)). + +-spec rewrite_instrs([instr()], target(), subst_dict(), regset(), [reg()]) + -> {[instr()], subst_dict(), regset()}. +rewrite_instrs([], _Target, Output, DefOut, []) -> + {[], Output, DefOut}; +rewrite_instrs([I|Is], Target, Input0, BBDefOut, SplitHere0) -> + {TDef, TUse} = def_use(I, Target), + {Def, Use} = {reg_names(TDef, Target), reg_names(TUse, Target)}, + %% Restores are generated in forward order by picking temps from SplitHere as + %% they're used or defined. After the last instruction, all temps have been + %% picked. + {ISplits, SplitHere} = + lists:partition(fun(R) -> + lists:member(R, Def) orelse lists:member(R, Use) + end, SplitHere0), + {Input, Restores} = + case ISplits of + [] -> {Input0, []}; + _ -> + make_splits(ISplits, Target, TDef, TUse, Input0, []) + end, + %% Here's the recursive call + {Acc0, Output, DefOut} = + rewrite_instrs(Is, Target, Input, BBDefOut, SplitHere), + %% From here we're processing instructions in reverse order, because to avoid + %% redundant spills we need to walk the 'def' dataflow, which is in reverse. + SubstFun = fun(Temp) -> + case orddict:find(reg_nr(Temp, Target), Input) of + {ok, NewTemp} -> NewTemp; + error -> Temp + end + end, + Acc1 = insert_spills(TDef, Target, Input, DefOut, Acc0), + Acc = Restores ++ [subst_temps(SubstFun, I, Target) | Acc1], + DefIn = ordsets:union(DefOut, ordsets:from_list(Def)), + {Acc, Output, DefIn}. + +-spec make_splits([reg()], target(), [temp()], [temp()], subst_dict(), + [instr()]) + -> {subst_dict(), [instr()]}. +make_splits([], _Target, _TDef, _TUse, Input, Acc) -> + {Input, Acc}; +make_splits([S|Ss], Target, TDef, TUse, Input0, Acc0) -> + SubstReg = new_reg_nr(Target), + {Acc, Subst} = + case find_reg_temp(S, TUse, Target) of + error -> + {ok, Temp} = find_reg_temp(S, TDef, Target), + {Acc0, update_reg_nr(SubstReg, Temp, Target)}; + {ok, Temp} -> + Subst0 = update_reg_nr(SubstReg, Temp, Target), + Acc1 = [mk_move(Temp, Subst0, Target) | Acc0], + {Acc1, Subst0} + end, + Input = orddict:store(S, Subst, Input0), + make_splits(Ss, Target, TDef, TUse, Input, Acc). + +-spec find_reg_temp(reg(), [temp()], target()) -> error | {ok, temp()}. +find_reg_temp(_Reg, [], _Target) -> error; +find_reg_temp(Reg, [T|Ts], Target) -> + case reg_nr(T, Target) of + Reg -> {ok, T}; + _ -> find_reg_temp(Reg, Ts, Target) + end. + +-spec insert_spills([temp()], target(), subst_dict(), regset(), [instr()]) + -> [instr()]. +insert_spills([], _Target, _Input, _DefOut, Acc) -> Acc; +insert_spills([T|Ts], Target, Input, DefOut, Acc0) -> + R = reg_nr(T, Target), + Acc = + case orddict:find(R, Input) of + error -> Acc0; + {ok, Subst} -> + case lists:member(R, DefOut) of + true -> Acc0; + false -> [mk_move(Subst, T, Target) | Acc0] + end + end, + insert_spills(Ts, Target, Input, DefOut, Acc). + +-spec rewrite_succs([label()], target(), label(), subst_dict(), avail(), + input(), target_cfg()) -> {input(), target_cfg()}. +rewrite_succs([], _Target, _P, _POutput, _Avail, Input, CFG) -> {Input, CFG}; +rewrite_succs([L|Ls], Target, P, POutput, Avail, Input0, CFG0) -> + NewLInput = orddict_with_ordset(want_in(L, Avail), POutput), + {Input, CFG} = + case Input0 of + #{L := LInput} -> + CFG2 = + case required_phi_moves(LInput, NewLInput) of + [] -> CFG0; + ReqMovs -> + PhiLb = new_label(Target), + Code = [mk_move(S,D,Target) || {S,D} <- ReqMovs] + ++ [mk_goto(L, Target)], + PhiBB = hipe_bb:mk_bb(Code), + CFG1 = update_bb(CFG0, PhiLb, PhiBB, Target), + bb_redirect_jmp(L, PhiLb, P, CFG1, Target) + end, + {Input0, CFG2}; + #{} -> + {Input0#{L => NewLInput}, CFG0} + end, + rewrite_succs(Ls, Target, P, POutput, Avail, Input, CFG). + +-spec bb_redirect_jmp(label(), label(), label(), target_cfg(), target()) + -> target_cfg(). +bb_redirect_jmp(From, To, Lb, CFG, Target) -> + BB0 = bb(CFG, Lb, Target), + Last = redirect_jmp(hipe_bb:last(BB0), From, To, Target), + BB = hipe_bb:code_update(BB0, hipe_bb:butlast(BB0) ++ [Last]), + update_bb(CFG, Lb, BB, Target). + +-spec required_phi_moves(subst_dict(), subst_dict()) -> [{reg(), reg()}]. +required_phi_moves([], []) -> []; +required_phi_moves([P|Is], [P|Os]) -> required_phi_moves(Is, Os); +required_phi_moves([{K, In}|Is], [{K, Out}|Os]) -> + [{Out, In}|required_phi_moves(Is, Os)]. + +%% @doc Returns a new orddict with the keys in Set and their associated values. +-spec orddict_with_ordset(ordsets:ordset(K), orddict:orddict(K, V)) + -> orddict:orddict(K, V). +orddict_with_ordset([S|Ss], [{K, _}|_]=Dict) when S < K -> + orddict_with_ordset(Ss, Dict); +orddict_with_ordset([S|_]=Set, [{K, _}|Ds]) when S > K -> + orddict_with_ordset(Set, Ds); +orddict_with_ordset([_S|Ss], [{_K, _}=P|Ds]) -> % _S == _K + [P|orddict_with_ordset(Ss, Ds)]; +orddict_with_ordset([], _) -> []; +orddict_with_ordset(_, []) -> []. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Target module interface functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +-define(TGT_IFACE_0(N), N( {M,C}) -> M:N( C)). +-define(TGT_IFACE_1(N), N(A1, {M,C}) -> M:N(A1, C)). +-define(TGT_IFACE_2(N), N(A1,A2, {M,C}) -> M:N(A1,A2, C)). +-define(TGT_IFACE_3(N), N(A1,A2,A3,{M,C}) -> M:N(A1,A2,A3,C)). + +?TGT_IFACE_2(bb). +?TGT_IFACE_1(def_use). +?TGT_IFACE_1(defines_all_alloc). +?TGT_IFACE_1(is_precoloured). +?TGT_IFACE_1(labels). +?TGT_IFACE_1(mk_goto). +?TGT_IFACE_2(mk_move). +?TGT_IFACE_0(new_label). +?TGT_IFACE_0(new_reg_nr). +?TGT_IFACE_3(redirect_jmp). +?TGT_IFACE_1(reg_nr). +?TGT_IFACE_1(reverse_postorder). +?TGT_IFACE_2(subst_temps). +?TGT_IFACE_3(update_bb). +?TGT_IFACE_2(update_reg_nr). + +liveout(Liveness, L, Target={TgtMod,TgtCtx}) -> + ordsets:from_list(reg_names(TgtMod:liveout(Liveness, L, TgtCtx), Target)). + +reg_names(Regs, {TgtMod,TgtCtx}) -> + [TgtMod:reg_nr(X,TgtCtx) || X <- Regs]. + +reg_def_use(I, Target) -> + {TDef, TUse} = def_use(I, Target), + {reg_names(TDef, Target), reg_names(TUse, Target)}. diff --git a/lib/hipe/regalloc/hipe_sparc_specific.erl b/lib/hipe/regalloc/hipe_sparc_specific.erl index 8d34604f84..78b6379eba 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,121 +11,138 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_specific). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_spill_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: --export([args/1, is_arg/1, is_global/1, new_spill_index/1]). --export([breadthorder/1, postorder/1]). +-export([args/2, is_arg/2, is_global/2, new_spill_index/2]). +-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_sparc_ra_postconditions:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(CFG) -> - non_alloc(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). +non_alloc(CFG, no_context) -> + non_alloc_1(hipe_sparc_registers:nr_args(), hipe_sparc_cfg:params(CFG)). %% same as hipe_sparc_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_sparc_liveness_gpr:analyse(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- hipe_sparc_liveness_gpr:livein(Liveness,L), hipe_sparc:temp_is_allocatable(X)]. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- hipe_sparc_liveness_gpr:liveout(BB_in_out_liveness,Label), hipe_sparc:temp_is_allocatable(X)]. %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_sparc_registers:allocatable_gpr(). -all_precoloured() -> +all_precoloured(no_context) -> hipe_sparc_registers:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_sparc_registers:is_precoloured_gpr(Reg). -is_fixed(R) -> +is_fixed(R, _) -> hipe_sparc_registers:is_fixed(R). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(sparc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_sparc_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_sparc_cfg:bb_add(CFG,L,BB). + +branch_preds(Branch,_) -> + hipe_sparc_cfg:branch_preds(Branch). + %% SPARC stuff -def_use(Instruction) -> - {defines(Instruction), uses(Instruction)}. +def_use(Instruction, Ctx) -> + {defines(Instruction, Ctx), uses(Instruction, Ctx)}. -uses(I) -> +uses(I, _) -> [X || X <- hipe_sparc_defuse:insn_use_gpr(I), hipe_sparc:temp_is_allocatable(X)]. -defines(I) -> +defines(I, _) -> [X || X <- hipe_sparc_defuse:insn_def_gpr(I), hipe_sparc:temp_is_allocatable(X)]. -is_move(Instruction) -> +defines_all_alloc(I, _) -> + hipe_sparc_defuse:insn_defs_all_gpr(I). + +is_move(Instruction, _) -> case hipe_sparc:is_pseudo_move(Instruction) of true -> Dst = hipe_sparc:pseudo_move_dst(Instruction), @@ -142,28 +155,60 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +is_spill_move(Instruction, _) -> + hipe_sparc:is_pseudo_spill_move(Instruction). + +reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_sparc:mk_pseudo_move(Src, Dst). + +mk_goto(Label, _) -> + hipe_sparc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(sparc). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, Temp, _) -> + hipe_sparc:mk_temp(Nr, hipe_sparc:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + hipe_sparc_subst:insn_temps( + fun(Op) -> + case hipe_sparc:temp_is_allocatable(Op) + andalso hipe_sparc:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + %%% Linear Scan stuff -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_sparc_cfg:postorder(CFG). -is_global(R) -> +is_global(R, _) -> R =:= hipe_sparc_registers:temp1() orelse R =:= hipe_sparc_registers:temp2() orelse R =:= hipe_sparc_registers:temp3() orelse hipe_sparc_registers:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> hipe_sparc_registers:is_arg(R). -args(CFG) -> +args(CFG, _) -> hipe_sparc_registers:args(hipe_sparc_cfg:arity(CFG)). diff --git a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl index 2edd3cb47e..485fdc212a 100644 --- a/lib/hipe/regalloc/hipe_sparc_specific_fp.erl +++ b/lib/hipe/regalloc/hipe_sparc_specific_fp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,133 +11,182 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_specific_fp). %% for hipe_coalescing_regalloc: --export([number_of_temporaries/1 - ,analyze/1 - ,labels/1 - ,all_precoloured/0 - ,bb/2 - ,liveout/2 - ,reg_nr/1 - ,def_use/1 - ,is_move/1 - ,is_precoloured/1 - ,var_range/1 - ,allocatable/0 - ,non_alloc/1 - ,physical_name/1 - ,reverse_postorder/1 - ,livein/2 - ,uses/1 - ,defines/1 +-export([number_of_temporaries/2 + ,analyze/2 + ,labels/2 + ,all_precoloured/1 + ,bb/3 + ,liveout/3 + ,reg_nr/2 + ,def_use/2 + ,is_move/2 + ,is_spill_move/2 + ,is_precoloured/2 + ,var_range/2 + ,allocatable/1 + ,non_alloc/2 + ,physical_name/2 + ,reverse_postorder/2 + ,livein/3 + ,uses/2 + ,defines/2 + ,defines_all_alloc/2 ]). %% for hipe_graph_coloring_regalloc: --export([is_fixed/1]). +-export([is_fixed/2]). %% for hipe_ls_regalloc: -%%-export([args/1, is_arg/1, is_global, new_spill_index/1]). -%%-export([breadthorder/1, postorder/1]). +%%-export([args/2, is_arg/2, is_global, new_spill_index/2]). +%%-export([breadthorder/2, postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_sparc_cfg:init(Defun). +%% callbacks for hipe_bb_weights, hipe_range_split +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - hipe_sparc_ra_postconditions_fp:check_and_rewrite(Defun, Coloring). +check_and_rewrite(CFG, Coloring, no_context) -> + hipe_sparc_ra_postconditions_fp:check_and_rewrite(CFG, Coloring). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_sparc_cfg:reverse_postorder(CFG). -non_alloc(_CFG) -> +non_alloc(_CFG, _) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> hipe_sparc_liveness_fpr:analyse(CFG). -livein(Liveness, L) -> +livein(Liveness, L, _) -> hipe_sparc_liveness_fpr:livein(Liveness, L). -liveout(BB_in_out_liveness, Label) -> +liveout(BB_in_out_liveness, Label, _) -> hipe_sparc_liveness_fpr:liveout(BB_in_out_liveness, Label). %% Registers stuff -allocatable() -> +allocatable(no_context) -> hipe_sparc_registers:allocatable_fpr(). -all_precoloured() -> - allocatable(). +all_precoloured(Ctx) -> + allocatable(Ctx). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> hipe_sparc_registers:is_precoloured_fpr(Reg). -is_fixed(_Reg) -> +is_fixed(_Reg, _) -> false. -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_sparc_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG, _) -> hipe_gensym:var_range(sparc). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(sparc), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG, L) -> +bb(CFG, L, _) -> hipe_sparc_cfg:bb(CFG, L). +update_bb(CFG,L,BB,_) -> + hipe_sparc_cfg:bb_add(CFG,L,BB). + +branch_preds(Branch,_) -> + hipe_sparc_cfg:branch_preds(Branch). + %% SPARC stuff -def_use(I) -> - {defines(I), uses(I)}. +def_use(I, Ctx) -> + {defines(I,Ctx), uses(I,Ctx)}. -uses(I) -> +uses(I, _) -> hipe_sparc_defuse:insn_use_fpr(I). -defines(I) -> +defines(I, _) -> hipe_sparc_defuse:insn_def_fpr(I). -is_move(I) -> +defines_all_alloc(I, _) -> + hipe_sparc_defuse:insn_defs_all_fpr(I). + +is_move(I, _) -> hipe_sparc:is_pseudo_fmove(I). -reg_nr(Reg) -> +is_spill_move(I, _) -> + hipe_sparc:is_pseudo_spill_fmove(I). + +reg_nr(Reg, _) -> hipe_sparc:temp_reg(Reg). +mk_move(Src, Dst, _) -> + hipe_sparc:mk_pseudo_fmove(Src, Dst). + +mk_goto(Label, _) -> + hipe_sparc:mk_b_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + hipe_sparc_cfg:redirect_jmp(Jmp, ToOld, ToNew). + +new_label(_) -> + hipe_gensym:get_next_label(sparc). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(sparc). + +update_reg_nr(Nr, _Temp, _) -> + hipe_sparc:mk_temp(Nr, 'double'). + +subst_temps(SubstFun, Instr, _) -> + hipe_sparc_subst:insn_temps( + fun(Op) -> + case hipe_sparc:temp_is_allocatable(Op) + andalso hipe_sparc:temp_type(Op) =:= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + -ifdef(notdef). -new_spill_index(SpillIndex)-> +new_spill_index(SpillIndex, _)-> SpillIndex+1. -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_sparc_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_sparc_cfg:postorder(CFG). -is_global(_R) -> +is_global(_R, _) -> false. -is_arg(_R) -> +is_arg(_R, _) -> false. -args(_CFG) -> +args(_CFG, _) -> []. -endif. diff --git a/lib/hipe/regalloc/hipe_spillcost.erl b/lib/hipe/regalloc/hipe_spillcost.erl index b241e637d9..906cdac1aa 100644 --- a/lib/hipe/regalloc/hipe_spillcost.erl +++ b/lib/hipe/regalloc/hipe_spillcost.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_spillcost). diff --git a/lib/hipe/regalloc/hipe_spillcost.hrl b/lib/hipe/regalloc/hipe_spillcost.hrl index 3cadcbe432..b1e84cee16 100644 --- a/lib/hipe/regalloc/hipe_spillcost.hrl +++ b/lib/hipe/regalloc/hipe_spillcost.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2009-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -type hipe_array() :: integer(). @@ -25,4 +18,3 @@ {uses :: hipe_array(), % number of uses of each temp bb_uses :: hipe_array() % number of basic blocks each temp occurs in }). - diff --git a/lib/hipe/regalloc/hipe_temp_map.erl b/lib/hipe/regalloc/hipe_temp_map.erl index 4085a0e1a7..58145efb3e 100644 --- a/lib/hipe/regalloc/hipe_temp_map.erl +++ b/lib/hipe/regalloc/hipe_temp_map.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% =========================================================================== %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved -%% Time-stamp: <2008-04-20 14:54:00 richard> %% =========================================================================== %% Module : hipe_temp_map %% Purpose : @@ -33,10 +26,12 @@ -module(hipe_temp_map). --export([cols2tuple/2, is_spilled/2, to_substlist/1]). +-export([cols2tuple/3, find/2, is_spilled/2, to_substlist/1]). -include("../main/hipe.hrl"). +-type target_context() :: any(). + %%---------------------------------------------------------------------------- %% Convert a list of [{R0, C1}, {R1, C2}, ...] to a temp_map %% (Currently implemented as a tuple) tuple {C1, C2, ...}. @@ -47,34 +42,32 @@ %% element 1 %%---------------------------------------------------------------------------- --spec cols2tuple(hipe_map(), atom()) -> hipe_temp_map(). +-spec cols2tuple(hipe_map(), module(), target_context()) -> hipe_temp_map(). -cols2tuple(Map, Target) -> - ?ASSERT(check_list(Map)), - SortedMap = lists:keysort(1, Map), - cols2tuple(0, SortedMap, [], Target). +cols2tuple(Map, TgtMod, TgtCtx) -> + SortedMap = lists:keysort(1, Map), + cols2tuple(0, SortedMap, [], TgtMod, TgtCtx). -%% sorted_cols2tuple(Map, Target) -> -%% ?ASSERT(check_list(Map)), +%% sorted_cols2tuple(Map, TgtMod, TgtCtx) -> %% ?ASSERT(Map =:= lists:keysort(1, Map)), -%% cols2tuple(0, Map, [], Target). +%% cols2tuple(0, Map, [], TgtMod, TgtCtx). %% Build a dense mapping -cols2tuple(_, [], Vs, _) -> +cols2tuple(_, [], Vs, _, _) -> %% Done reverse the list and convert to tuple. list_to_tuple(lists:reverse(Vs)); -cols2tuple(N, [{R, C}|Ms], Vs, Target) when N =:= R -> +cols2tuple(N, [{R, C}|Ms], Vs, TgtMod, TgtCtx) when N =:= R -> %% N makes sure the mapping is dense. N is he next key. - cols2tuple(N+1, Ms, [C|Vs], Target); -cols2tuple(N, SourceMapping, Vs, Target) -> + cols2tuple(N+1, Ms, [C|Vs], TgtMod, TgtCtx); +cols2tuple(N, SourceMapping=[{R,_}|_], Vs, TgtMod, TgtCtx) when N < R -> %% The source was sparse, make up some placeholders... Val = - case Target:is_precoloured(N) of + case TgtMod:is_precoloured(N, TgtCtx) of %% If it is precoloured, we know what to map it to. true -> {reg, N}; false -> unknown end, - cols2tuple(N+1, SourceMapping, [Val|Vs], Target). + cols2tuple(N+1, SourceMapping, [Val|Vs], TgtMod, TgtCtx). %% %% True if temp Temp is spilled. @@ -82,7 +75,7 @@ cols2tuple(N, SourceMapping, Vs, Target) -> -spec is_spilled(non_neg_integer(), hipe_temp_map()) -> boolean(). is_spilled(Temp, Map) -> - case element(Temp+1, Map) of + case find(Temp, Map) of {reg, _R} -> false; {fp_reg, _R}-> false; {spill, _N} -> true; @@ -106,9 +99,10 @@ is_spilled(Temp, Map) -> %% {spill, _N} -> false; %% unknown -> false %% end. -%% -%% %% Returns the inf temp Temp is mapped to. -%% find(Temp, Map) -> element(Temp+1, Map). + +%% Returns the inf temp Temp is mapped to. +find(Temp, Map) when Temp < tuple_size(Map) -> element(Temp+1, Map); +find(_, Map) when is_tuple(Map) -> unknown. % consistency with cols2tuple/3 %% diff --git a/lib/hipe/regalloc/hipe_x86_specific.erl b/lib/hipe/regalloc/hipe_x86_specific.erl index 4edf8674b7..dacfb71b00 100644 --- a/lib/hipe/regalloc/hipe_x86_specific.erl +++ b/lib/hipe/regalloc/hipe_x86_specific.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_SPECIFIC, hipe_amd64_specific). @@ -25,100 +18,113 @@ -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). -define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_SUBST, hipe_amd64_subst). -else. -define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). -define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_SUBST, hipe_x86_subst). -endif. -module(?HIPE_X86_SPECIFIC). --export([number_of_temporaries/1]). +-export([number_of_temporaries/2]). %% The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_x86_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - def_use/1, - is_arg/1, % used by hipe_ls_regalloc - is_move/1, - is_fixed/1, % used by hipe_graph_coloring_regalloc - is_global/1, - is_precoloured/1, - reg_nr/1, - non_alloc/1, - allocatable/0, - physical_name/1, - all_precoloured/0, - new_spill_index/1, % used by hipe_ls_regalloc - var_range/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + def_use/2, + is_arg/2, % used by hipe_ls_regalloc + is_move/2, + is_spill_move/2, + is_fixed/2, % used by hipe_graph_coloring_regalloc + is_global/2, + is_precoloured/2, + reg_nr/2, + non_alloc/2, + allocatable/1, + physical_name/2, + all_precoloured/1, + new_spill_index/2, % used by hipe_ls_regalloc + var_range/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). %% callbacks for hipe_regalloc_loop --export([defun_to_cfg/1, - check_and_rewrite/2]). +-export([check_and_rewrite/3]). + +%% callbacks for hipe_regalloc_prepass, hipe_range_split +-export([mk_move/3, + mk_goto/2, + redirect_jmp/4, + new_label/1, + new_reg_nr/1, + update_reg_nr/3, + update_bb/4, + subst_temps/3]). -defun_to_cfg(Defun) -> - hipe_x86_cfg:init(Defun). +%% callbacks for hipe_bb_weights +-export([branch_preds/2]). -check_and_rewrite(Defun, Coloring) -> - ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(Defun, Coloring, 'normal'). +check_and_rewrite(CFG, Coloring, _) -> + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(CFG, Coloring, 'normal'). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -breadthorder(CFG) -> +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). %% Globally defined registers for linear scan -is_global(R) -> +is_global(R, _) -> ?HIPE_X86_REGISTERS:temp1() =:= R orelse ?HIPE_X86_REGISTERS:temp0() =:= R orelse ?HIPE_X86_REGISTERS:is_fixed(R). -is_fixed(R) -> +is_fixed(R, _) -> ?HIPE_X86_REGISTERS:is_fixed(R). -is_arg(R) -> +is_arg(R, _) -> ?HIPE_X86_REGISTERS:is_arg(R). -args(CFG) -> +args(CFG, _) -> ?HIPE_X86_REGISTERS:args(hipe_x86_cfg:arity(CFG)). -non_alloc(CFG) -> - non_alloc(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). +non_alloc(CFG, _) -> + non_alloc_1(?HIPE_X86_REGISTERS:nr_args(), hipe_x86_cfg:params(CFG)). %% same as hipe_x86_frame:fix_formals/2 -non_alloc(0, Rest) -> Rest; -non_alloc(N, [_|Rest]) -> non_alloc(N-1, Rest); -non_alloc(_, []) -> []. +non_alloc_1(0, Rest) -> Rest; +non_alloc_1(N, [_|Rest]) -> non_alloc_1(N-1, Rest); +non_alloc_1(_, []) -> []. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:heap_limit(), hipe_x86:temp_type(X) =/= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_reg(X) =/= ?HIPE_X86_REGISTERS:fcalls(), @@ -127,37 +133,43 @@ liveout(BB_in_out_liveness,Label) -> %% Registers stuff -allocatable() -> +allocatable(_) -> ?HIPE_X86_REGISTERS:allocatable(). -all_precoloured() -> +all_precoloured(_) -> ?HIPE_X86_REGISTERS:all_precoloured(). -is_precoloured(Reg) -> +is_precoloured(Reg,_) -> ?HIPE_X86_REGISTERS:is_precoloured(Reg). -physical_name(Reg) -> +physical_name(Reg,_) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG,_) -> hipe_x86_cfg:labels(CFG). -var_range(_CFG) -> +var_range(_CFG,_) -> hipe_gensym:var_range(x86). -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG,_) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_x86_cfg:bb(CFG,L). +update_bb(CFG,L,BB,_) -> + hipe_x86_cfg:bb_add(CFG,L,BB). + +branch_preds(Instr,_) -> + hipe_x86_cfg:branch_preds(Instr). + %% X86 stuff -def_use(Instruction) -> +def_use(Instruction,_) -> {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double'], @@ -166,17 +178,19 @@ def_use(Instruction) -> hipe_x86:temp_type(X) =/= 'double'] }. -uses(I) -> +uses(I,_) -> [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double']. -defines(I) -> +defines(I,_) -> [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =/= 'double']. -is_move(Instruction) -> +defines_all_alloc(I,_) -> ?HIPE_X86_DEFUSE:insn_defs_all(I). + +is_move(Instruction,_) -> case hipe_x86:is_move(Instruction) of true -> Src = hipe_x86:move_src(Instruction), @@ -197,8 +211,49 @@ is_move(Instruction) -> false -> false end. -reg_nr(Reg) -> +is_spill_move(Instruction,_) -> + hipe_x86:is_pseudo_spill_move(Instruction). + +reg_nr(Reg,_) -> hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) when is_integer(SpillIndex) -> +mk_move(Src, Dst, _) -> + hipe_x86:mk_move(Src, Dst). + +mk_goto(Label, _) -> + hipe_x86:mk_jmp_label(Label). + +redirect_jmp(Jmp, ToOld, ToNew, _) when is_integer(ToOld), is_integer(ToNew) -> + Ref = make_ref(), + put(Ref, false), + I = hipe_x86_subst:insn_lbls( + fun(Tgt) -> + if Tgt =:= ToOld -> put(Ref, true), ToNew; + is_integer(Tgt) -> Tgt + end + end, Jmp), + true = erase(Ref), % Assert that something was rewritten + I. + +new_label(_) -> + hipe_gensym:get_next_label(x86). + +new_reg_nr(_) -> + hipe_gensym:get_next_var(x86). + +update_reg_nr(Nr, Temp, _) -> + hipe_x86:mk_temp(Nr, hipe_x86:temp_type(Temp)). + +subst_temps(SubstFun, Instr, _) -> + ?HIPE_X86_SUBST:insn_temps( + fun(Op) -> + case hipe_x86:temp_is_allocatable(Op) + andalso hipe_x86:temp_type(Op) =/= 'double' + of + true -> SubstFun(Op); + false -> Op + end + end, Instr). + +new_spill_index(SpillIndex, _) when is_integer(SpillIndex) -> SpillIndex+1. diff --git a/lib/hipe/regalloc/hipe_x86_specific_x87.erl b/lib/hipe/regalloc/hipe_x86_specific_x87.erl index ece07cb2f9..3fe49e1f00 100644 --- a/lib/hipe/regalloc/hipe_x86_specific_x87.erl +++ b/lib/hipe/regalloc/hipe_x86_specific_x87.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). @@ -32,110 +25,119 @@ -endif. -module(?HIPE_X86_SPECIFIC_X87). --export([allocatable/0, - is_precoloured/1, - %% var_range/1, - %% def_use/1, - %% is_fixed/1, - is_arg/1, - %% non_alloc/1, - new_spill_index/1, - number_of_temporaries/1 +-export([allocatable/2, + is_precoloured/2, + %% var_range/2, + %% def_use/2, + %% is_fixed/2, + is_arg/2, + %% non_alloc/2, + new_spill_index/2, + number_of_temporaries/2 ]). %% The following exports are used as M:F(...) calls from other modules; %% e.g. hipe_x86_ra_ls. --export([analyze/1, - bb/2, - args/1, - labels/1, - livein/2, - liveout/2, - uses/1, - defines/1, - is_global/1, - reg_nr/1, - physical_name/1, - breadthorder/1, - postorder/1, - reverse_postorder/1]). - -breadthorder(CFG) -> +-export([analyze/2, + bb/3, + args/2, + labels/2, + livein/3, + liveout/3, + uses/2, + defines/2, + defines_all_alloc/2, + is_spill_move/2, + is_global/2, + reg_nr/2, + physical_name/2, + breadthorder/2, + postorder/2, + reverse_postorder/2]). + +%% callbacks for hipe_x86_ra_ls +-export([check_and_rewrite/4]). + +%% Rewrite happens in hipe_x86_ra_finalise:finalise/4 +check_and_rewrite(CFG, _Coloring, 'linearscan', _) -> + {CFG, false}. + +breadthorder(CFG, _) -> hipe_x86_cfg:breadthorder(CFG). -postorder(CFG) -> +postorder(CFG, _) -> hipe_x86_cfg:postorder(CFG). -reverse_postorder(CFG) -> +reverse_postorder(CFG, _) -> hipe_x86_cfg:reverse_postorder(CFG). -is_global(_) -> +is_global(_, _) -> false. -ifdef(notdef). -is_fixed(_) -> +is_fixed(_, _) -> false. -endif. -is_arg(_) -> +is_arg(_, _) -> false. -args(_) -> +args(_, _) -> []. -ifdef(notdef). -non_alloc(_) -> +non_alloc(_, _) -> []. -endif. %% Liveness stuff -analyze(CFG) -> +analyze(CFG, _) -> ?HIPE_X86_LIVENESS:analyze(CFG). -livein(Liveness,L) -> +livein(Liveness,L,_) -> [X || X <- ?HIPE_X86_LIVENESS:livein(Liveness,L), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. -liveout(BB_in_out_liveness,Label) -> +liveout(BB_in_out_liveness,Label,_) -> [X || X <- ?HIPE_X86_LIVENESS:liveout(BB_in_out_liveness,Label), hipe_x86:temp_is_allocatable(X), hipe_x86:temp_type(X) =:= 'double']. %% Registers stuff -allocatable() -> +allocatable('linearscan', _) -> ?HIPE_X86_REGISTERS:allocatable_x87(). -is_precoloured(Reg) -> +is_precoloured(Reg, _) -> ?HIPE_X86_REGISTERS:is_precoloured_x87(Reg). -physical_name(Reg) -> +physical_name(Reg, _) -> Reg. %% CFG stuff -labels(CFG) -> +labels(CFG, _) -> hipe_x86_cfg:labels(CFG). -ifdef(notdef). -var_range(_CFG) -> +var_range(_CFG, _) -> {Min,Max} = hipe_gensym:var_range(x86), %% io:format("Var_range: ~w\n",[{Min,Max}]), {Min,Max}. -endif. -number_of_temporaries(_CFG) -> +number_of_temporaries(_CFG, _) -> Highest_temporary = hipe_gensym:get_var(x86), %% Since we can have temps from 0 to Max adjust by +1. Highest_temporary + 1. -bb(CFG,L) -> +bb(CFG,L,_) -> hipe_x86_cfg:bb(CFG,L). %% X86 stuff -ifdef(notdef). -def_use(Instruction) -> +def_use(Instruction, _) -> {[X || X <- ?HIPE_X86_DEFUSE:insn_def(Instruction), hipe_x86:temp_is_allocatable(X), temp_is_double(X)], @@ -145,21 +147,26 @@ def_use(Instruction) -> }. -endif. -uses(I) -> +uses(I, _) -> [X || X <- ?HIPE_X86_DEFUSE:insn_use(I), hipe_x86:temp_is_allocatable(X), temp_is_double(X)]. -defines(I) -> +defines(I, _) -> [X || X <- ?HIPE_X86_DEFUSE:insn_def(I), hipe_x86:temp_is_allocatable(X), temp_is_double(X)]. +defines_all_alloc(I, _) -> hipe_amd64_defuse:insn_defs_all(I). + +is_spill_move(I, _) -> + hipe_x86:is_pseudo_spill_fmove(I). + temp_is_double(Temp) -> hipe_x86:temp_type(Temp) =:= 'double'. -reg_nr(Reg) -> +reg_nr(Reg, _) -> hipe_x86:temp_reg(Reg). -new_spill_index(SpillIndex) -> +new_spill_index(SpillIndex, _) -> SpillIndex+1. diff --git a/lib/hipe/rtl/Makefile b/lib/hipe/rtl/Makefile index b4cdf8b1f2..5abc9ec049 100644 --- a/lib/hipe/rtl/Makefile +++ b/lib/hipe/rtl/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2001-2016. All Rights Reserved. +# Copyright Ericsson AB 2001-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -118,10 +118,12 @@ else TYPE_STR= endif -ifeq ($(FLAVOR),smp) -FLAVOR_STR=.smp -else +FLAVOR=$(DEFAULT_FLAVOR) + +ifeq ($(FLAVOR),plain) FLAVOR_STR= +else +FLAVOR_STR=.smp endif ifeq ($(XCOMP),yes) diff --git a/lib/hipe/rtl/hipe_icode2rtl.erl b/lib/hipe/rtl/hipe_icode2rtl.erl index 22feca47cc..6da8a76d34 100644 --- a/lib/hipe/rtl/hipe_icode2rtl.erl +++ b/lib/hipe/rtl/hipe_icode2rtl.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,17 +11,12 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%======================================================================= %% File : hipe_icode2rtl.erl %% Author(s) : Erik Johansson %% Description : Translates Icode to RTL %%======================================================================= -%% -%% $Id$ -%% %% TODO: Better handling of switches... -module(hipe_icode2rtl). @@ -541,8 +532,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> FalseLbl, Pred)]; '=:=' -> [Arg1, Arg2] = Args, + TypeTestLbl = hipe_rtl:mk_new_label(), [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, - hipe_rtl:label_name(GenLbl), Pred), + hipe_rtl:label_name(TypeTestLbl), Pred), + TypeTestLbl, + hipe_tagscheme:test_either_immed(Arg1, Arg2, FalseLbl, + hipe_rtl:label_name(GenLbl)), GenLbl, hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, TestRetName, [], not_remote), @@ -555,8 +550,12 @@ gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> TrueLbl, 1-Pred)]; '=/=' -> [Arg1, Arg2] = Args, + TypeTestLbl = hipe_rtl:mk_new_label(), [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, - hipe_rtl:label_name(GenLbl), 1-Pred), + hipe_rtl:label_name(TypeTestLbl), 1-Pred), + TypeTestLbl, + hipe_tagscheme:test_either_immed(Arg1, Arg2, TrueLbl, + hipe_rtl:label_name(GenLbl)), GenLbl, hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, TestRetName, [], not_remote), diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl index 0726827299..04c9728d5c 100644 --- a/lib/hipe/rtl/hipe_rtl.erl +++ b/lib/hipe/rtl/hipe_rtl.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @doc @@ -187,18 +181,14 @@ mk_branch/5, mk_branch/6, - branch_src1/1, - branch_src2/1, - branch_cond/1, - branch_true_label/1, - branch_false_label/1, - branch_pred/1, + mk_branch/7, %% is_branch/1, %% branch_true_label_update/2, %% branch_false_label_update/2, mk_alub/7, mk_alub/8, + alub_has_dst/1, alub_dst/1, alub_src1/1, alub_op/1, @@ -338,6 +328,7 @@ defines/1, redirect_jmp/3, is_safe/1, + reduce_unused/1, %% highest_var/1, pp/1, pp/2, @@ -588,37 +579,25 @@ is_label(#label{}) -> true; is_label(_) -> false. %% -%% branch -%% - -mk_branch(Src1, Op, Src2, True, False) -> - mk_branch(Src1, Op, Src2, True, False, 0.5). -mk_branch(Src1, Op, Src2, True, False, P) -> - #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True, - false_label=False, p=P}. -branch_src1(#branch{src1=Src1}) -> Src1. -branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}. -branch_src2(#branch{src2=Src2}) -> Src2. -branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}. -branch_cond(#branch{'cond'=Cond}) -> Cond. -branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl. -branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}. -branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl. -branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}. -branch_pred(#branch{p=P}) -> P. - -%% %% alub %% -type alub_cond() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le' | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'. +mk_branch(Src1, Cond, Src2, True, False) -> + mk_branch(Src1, Cond, Src2, True, False, 0.5). +mk_branch(Src1, Cond, Src2, True, False, P) -> + mk_branch(Src1, 'sub', Src2, Cond, True, False, P). +mk_branch(Src1, Op, Src2, Cond, True, False, P) -> + mk_alub([], Src1, Op, Src2, Cond, True, False, P). + mk_alub(Dst, Src1, Op, Src2, Cond, True, False) -> mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5). mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) -> #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond, true_label=True, false_label=False, p=P}. +alub_has_dst(#alub{dst=Dst}) -> Dst =/= []. alub_dst(#alub{dst=Dst}) -> Dst. alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}. alub_src1(#alub{src1=Src1}) -> Src1. @@ -943,8 +922,7 @@ args(I) -> case I of #alu{} -> [alu_src1(I), alu_src2(I)]; #alub{} -> [alub_src1(I), alub_src2(I)]; - #branch{} -> [branch_src1(I), branch_src2(I)]; - #call{} -> + #call{} -> Args = call_arglist(I) ++ hipe_rtl_arch:call_used(), case call_is_known(I) of false -> [call_fun(I) | Args]; @@ -987,8 +965,8 @@ args(I) -> defines(Instr) -> Defs = case Instr of #alu{} -> [alu_dst(Instr)]; + #alub{dst=[]} -> []; #alub{} -> [alub_dst(Instr)]; - #branch{} -> []; #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined(); #comment{} -> []; #enter{} -> []; @@ -1042,9 +1020,6 @@ subst_uses(Subst, I) -> #alub{} -> I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))), alub_src2_update(I0, subst1(Subst, alub_src2(I))); - #branch{} -> - I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))), - branch_src2_update(I0, subst1(Subst, branch_src2(I))); #call{} -> case call_is_known(I) of false -> @@ -1126,11 +1101,6 @@ subst_uses_llvm(Subst, I) -> {NewSrc1, _ } = subst1_llvm(Subst1, alub_src1(I)), I0 = alub_src1_update(I, NewSrc1), alub_src2_update(I0, NewSrc2); - #branch{} -> - {NewSrc2, Subst1} = subst1_llvm(Subst, branch_src2(I)), - {NewSrc1, _ } = subst1_llvm(Subst1, branch_src1(I)), - I0 = branch_src1_update(I, NewSrc1), - branch_src2_update(I0, NewSrc2); #call{} -> case call_is_known(I) of false -> @@ -1243,10 +1213,10 @@ subst_defines(Subst, I)-> case I of #alu{} -> alu_dst_update(I, subst1(Subst, alu_dst(I))); + #alub{dst=[]} -> + I; #alub{} -> alub_dst_update(I, subst1(Subst, alub_dst(I))); - #branch{} -> - I; #call{} -> call_dstlist_update(I, subst_list(Subst, call_dstlist(I))); #comment{} -> @@ -1313,7 +1283,6 @@ is_safe(Instr) -> case Instr of #alu{} -> true; #alub{} -> false; - #branch{} -> false; #call{} -> false; #comment{} -> false; #enter{} -> false; @@ -1340,6 +1309,24 @@ is_safe(Instr) -> #switch{} -> false %% Maybe this is safe... end. +%% @spec reduce_unused(rtl_instruction()) +%% -> false | [rtl_instruction()]. +%% +%% @doc Produces a simplified instruction sequence that is equivalent to [Instr] +%% under the assumption that all results of Instr are unused, or 'false' if +%% there is no such sequence (other than [Instr] itself). + +reduce_unused(Instr) -> + case Instr of + #alub{dst=Dst} when Dst =/= [] -> + [Instr#alub{dst=[]}]; + _ -> + case is_safe(Instr) of + true -> []; + false -> false + end + end. + %% %% True if argument is an alu-operator %% @@ -1386,17 +1373,6 @@ redirect_jmp(Jmp, ToOld, ToNew) -> %% OBS: In a jmp instruction more than one labels may be identical %% and thus need redirection! case Jmp of - #branch{} -> - TmpJmp = case branch_true_label(Jmp) of - ToOld -> branch_true_label_update(Jmp, ToNew); - _ -> Jmp - end, - case branch_false_label(TmpJmp) of - ToOld -> - branch_false_label_update(TmpJmp, ToNew); - _ -> - TmpJmp - end; #switch{} -> NewLbls = [case Lbl =:= ToOld of true -> ToNew; @@ -1591,13 +1567,6 @@ pp_instr(Dev, I) -> io:format(Dev, "~n", []); #label{} -> io:format(Dev, "L~w:~n", [label_name(I)]); - #branch{} -> - io:format(Dev, " if (", []), - pp_arg(Dev, branch_src1(I)), - io:format(Dev, " ~w ", [branch_cond(I)]), - pp_arg(Dev, branch_src2(I)), - io:format(Dev, ") then L~w (~.2f) else L~w~n", - [branch_true_label(I), branch_pred(I), branch_false_label(I)]); #switch{} -> io:format(Dev, " switch (", []), pp_arg(Dev, switch_src(I)), @@ -1606,7 +1575,10 @@ pp_instr(Dev, I) -> io:format(Dev, ">\n", []); #alub{} -> io:format(Dev, " ", []), - pp_arg(Dev, alub_dst(I)), + case alub_has_dst(I) of + true -> pp_arg(Dev, alub_dst(I)); + false -> io:format(Dev, "_", []) + end, io:format(Dev, " <- ", []), pp_arg(Dev, alub_src1(I)), io:format(Dev, " ~w ", [alub_op(I)]), diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl index cc76e7e5c4..50059693aa 100644 --- a/lib/hipe/rtl/hipe_rtl.hrl +++ b/lib/hipe/rtl/hipe_rtl.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -28,7 +22,6 @@ -record(alu, {dst, src1, op, src2}). -record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}). --record(branch, {src1, src2, 'cond', true_label, false_label, p}). -record(call, {dstlist, 'fun', arglist, type, continuation, failcontinuation, normalcontinuation = []}). -record(comment, {text}). diff --git a/lib/hipe/rtl/hipe_rtl_arch.erl b/lib/hipe/rtl/hipe_rtl_arch.erl index 397b96120e..65149ea7db 100644 --- a/lib/hipe/rtl/hipe_rtl_arch.erl +++ b/lib/hipe/rtl/hipe_rtl_arch.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. %%===================================================================== diff --git a/lib/hipe/rtl/hipe_rtl_arith.inc b/lib/hipe/rtl/hipe_rtl_arith.inc index 0c396c8e76..c05b7aa160 100644 --- a/lib/hipe/rtl/hipe_rtl_arith.inc +++ b/lib/hipe/rtl/hipe_rtl_arith.inc @@ -1,10 +1,6 @@ %% -*- Erlang -*- %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_rtl_arith.inc diff --git a/lib/hipe/rtl/hipe_rtl_arith_32.erl b/lib/hipe/rtl/hipe_rtl_arith_32.erl index 12075ed609..1f911642d5 100644 --- a/lib/hipe/rtl/hipe_rtl_arith_32.erl +++ b/lib/hipe/rtl/hipe_rtl_arith_32.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2002 by Erik Johansson. diff --git a/lib/hipe/rtl/hipe_rtl_arith_64.erl b/lib/hipe/rtl/hipe_rtl_arith_64.erl index 6dac8fb145..5fa067b98e 100644 --- a/lib/hipe/rtl/hipe_rtl_arith_64.erl +++ b/lib/hipe/rtl/hipe_rtl_arith_64.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_rtl_arith_64.erl diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl index 9b400f4c93..c11f61d567 100644 --- a/lib/hipe/rtl/hipe_rtl_binary.erl +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -1,9 +1,5 @@ -%% -*- erlang-indent-level: 2 -*- +%%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_rtl_binary.erl @@ -106,10 +100,20 @@ create_lbls(0) -> %%------------------------------------------------------------------------------ get_word_integer(Var, Register, SystemLimitLblName, FalseLblName) -> - [EndLbl] = create_lbls(1), - EndName = hipe_rtl:label_name(EndLbl), - get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, EndName, EndName, - [EndLbl]). + case hipe_rtl:is_imm(Var) of + true -> + TaggedVal = hipe_rtl:imm_value(Var), + true = hipe_tagscheme:is_fixnum(TaggedVal), + Val = hipe_tagscheme:fixnum_val(TaggedVal), + if Val < 0 -> [hipe_rtl:mk_goto(FalseLblName)]; + true -> [hipe_rtl:mk_move(Register, hipe_rtl:mk_imm(Val))] + end; + false -> + [EndLbl] = create_lbls(1), + EndName = hipe_rtl:label_name(EndLbl), + get_word_integer(Var, Register,SystemLimitLblName, FalseLblName, + EndName, EndName, [EndLbl]) + end. get_word_integer(Var, Register, SystemLimitLblName, FalseLblName, TrueLblName, BigLblName, Tail) -> diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl index 367d76b24d..52ea5db382 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% ==================================================================== %% Module : hipe_rtl_binary_construct @@ -143,43 +137,6 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab end end; - {bs_put_integer, Size, Flags, ConstInfo} -> - Aligned = aligned(Flags), - LittleEndian = littleendian(Flags), - [NewOffset] = get_real(Dst), - case is_illegal_const(Size) of - true -> - [hipe_rtl:mk_goto(FalseLblName)]; - false -> - case ConstInfo of - fail -> - [hipe_rtl:mk_goto(FalseLblName)]; - _ -> - case Args of - [Src, Base, Offset] -> - CCode = static_int_c_code(NewOffset, Src, - Base, Offset, Size, - Flags, TrueLblName, - FalseLblName), - put_static_int(NewOffset, Src, Base, Offset, Size, - CCode, Aligned, LittleEndian, TrueLblName); - [Src, Bits, Base, Offset] -> - {SizeCode, SizeReg} = - hipe_rtl_binary:make_size(Size, Bits, - SystemLimitLblName, - FalseLblName), - CCode = int_c_code(NewOffset, Src, Base, - Offset, SizeReg, Flags, - TrueLblName, FalseLblName), - InCode = - put_dynamic_int(NewOffset, Src, Base, Offset, - SizeReg, CCode, Aligned, - LittleEndian, TrueLblName), - SizeCode ++ InCode - end - end - end; - {unsafe_bs_put_integer, 0, _Flags, _ConstInfo} -> [NewOffset] = get_real(Dst), case Args of @@ -192,44 +149,12 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab end; {unsafe_bs_put_integer, Size, Flags, ConstInfo} -> - case is_illegal_const(Size) of - true -> - [hipe_rtl:mk_goto(FalseLblName)]; - false -> - Aligned = aligned(Flags), - LittleEndian = littleendian(Flags), - [NewOffset] = get_real(Dst), - case ConstInfo of - fail -> - [hipe_rtl:mk_goto(FalseLblName)]; - _ -> - case Args of - [Src, Base, Offset] -> - CCode = static_int_c_code(NewOffset, Src, - Base, Offset, Size, - Flags, TrueLblName, - FalseLblName), - put_unsafe_static_int(NewOffset, Src, Base, - Offset, Size, - CCode, Aligned, LittleEndian, - TrueLblName); - [Src, Bits, Base, Offset] -> - {SizeCode, SizeReg} = - hipe_rtl_binary:make_size(Size, Bits, - SystemLimitLblName, - FalseLblName), - CCode = int_c_code(NewOffset, Src, Base, - Offset, SizeReg, Flags, - TrueLblName, FalseLblName), - InCode = - put_unsafe_dynamic_int(NewOffset, Src, Base, - Offset, SizeReg, CCode, - Aligned, LittleEndian, - TrueLblName), - SizeCode ++ InCode - end - end - end; + do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, true, + TrueLblName, FalseLblName, SystemLimitLblName); + + {bs_put_integer, Size, Flags, ConstInfo} -> + do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, false, + TrueLblName, FalseLblName, SystemLimitLblName); bs_utf8_size -> case Dst of @@ -366,6 +291,40 @@ gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab {Code, ConstTab} end. +%% Common implementation of bs_put_integer and unsafe_bs_put_integer +do_bs_put_integer(Dst, Args, Size, Flags, ConstInfo, SrcUnsafe, + TrueLblName, FalseLblName, SystemLimitLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, Base, Offset, Size, + Flags, TrueLblName, FalseLblName), + put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, SrcUnsafe, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = + hipe_rtl_binary:make_size(Size, Bits, SystemLimitLblName, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, + CCode, Aligned, LittleEndian, SrcUnsafe, + TrueLblName), + SizeCode ++ InCode + end + end + end. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Code that is used in the append and init writeable functions @@ -429,8 +388,8 @@ realloc_binary(SizeReg, ProcBin, Base) -> hipe_tagscheme:set_field_from_term(ProcBinFlagsTag, ProcBin, Flags), hipe_tagscheme:get_field_from_term(ProcBinValTag, ProcBin, BinPointer), hipe_tagscheme:get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize), - hipe_rtl:mk_branch(OrigSize, 'ltu', ResultingSize, - ReallocLblName, NoReallocLblName), + hipe_rtl:mk_branch(OrigSize, 'geu', ResultingSize, NoReallocLblName, + ReallocLblName), NoReallocLbl, hipe_tagscheme:get_field_from_term(ProcBinBytesTag, ProcBin, Base), hipe_rtl:mk_goto(ContLblName), @@ -757,9 +716,9 @@ test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) -> [AlignedLbl, CLbl] = create_lbls(2), [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits), hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset), - hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq', - hipe_rtl:label_name(AlignedLbl), - hipe_rtl:label_name(CLbl)), + hipe_rtl:mk_branch(Tmp, 'and', ?LOW_BITS, 'eq', + hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(CLbl), 0.5), AlignedLbl, AlignedCode, CLbl, @@ -813,28 +772,8 @@ put_float(_NewOffset, _Src, _Base, _Offset, _Size, CCode, _Aligned, CCode. put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), - case {Aligned, LittleEndian} of - {true, true} -> - Init ++ - copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End; - {true, false} -> - Init ++ - copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End; - {false, true} -> - CCode; - {false, false} -> - Init ++ - copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ - End - end. - -put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + LittleEndian, SrcUnsafe, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName), case {Aligned, LittleEndian} of {true, true} -> Init ++ @@ -853,27 +792,8 @@ put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, end. put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), - case Aligned of - true -> - case LittleEndian of - true -> - Init ++ - copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ - End; - false -> - Init ++ - copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ - End - end; - false -> - CCode - end. - -put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, - LittleEndian, TrueLblName) -> - {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + LittleEndian, SrcUnsafe, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, SrcUnsafe, TrueLblName), case Aligned of true -> case LittleEndian of @@ -890,14 +810,13 @@ put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, CCode end. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% Help functions used by the above %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -make_init_end(Src, CCode, TrueLblName) -> +make_init_end(Src, CCode, false, TrueLblName) -> [CLbl, SuccessLbl] = create_lbls(2), [UntaggedSrc] = create_regs(1), Init = [hipe_tagscheme:test_fixnum(Src, hipe_rtl:label_name(SuccessLbl), @@ -905,9 +824,8 @@ make_init_end(Src, CCode, TrueLblName) -> SuccessLbl, hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], End = [hipe_rtl:mk_goto(TrueLblName), CLbl| CCode], - {Init, End, UntaggedSrc}. - -make_init_end(Src, TrueLblName) -> + {Init, End, UntaggedSrc}; +make_init_end(Src, _CCode, true, TrueLblName) -> [UntaggedSrc] = create_regs(1), Init = [hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], End = [hipe_rtl:mk_goto(TrueLblName)], @@ -1284,8 +1202,7 @@ is_divisible(Dividend, Divisor, SuccLbl, FailLbl) -> true -> %% Divisor is a power of 2 %% Test that the Log2-1 lowest bits are clear Mask = hipe_rtl:mk_imm(Divisor - 1), - [Tmp] = create_regs(1), - [hipe_rtl:mk_alub(Tmp, Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)]; + [hipe_rtl:mk_branch(Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)]; false -> %% We need division, fall back to a primop [hipe_rtl:mk_call([], is_divisible, [Dividend, hipe_rtl:mk_imm(Divisor)], diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl index d999cd2743..362a52f8fe 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_match.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_rtl_binary_match.erl @@ -659,9 +653,8 @@ test_alignment_code(Size, Unit, SLblName, FalseLblName) -> end. get_fast_test_code(Size, AndTest, SLblName, FalseLblName) -> - [Tmp] = create_gcsafe_regs(1), - [hipe_rtl:mk_alub(Tmp, Size, 'and', hipe_rtl:mk_imm(AndTest), - 'eq', SLblName, FalseLblName)]. + [hipe_rtl:mk_branch(Size, 'and', hipe_rtl:mk_imm(AndTest), 'eq', + SLblName, FalseLblName, 0.5)]. %% This is really slow get_slow_test_code(Size, Unit, SLblName, FalseLblName) -> diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl index f49e8f815f..ce399498d6 100644 --- a/lib/hipe/rtl/hipe_rtl_cfg.erl +++ b/lib/hipe/rtl/hipe_rtl_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_rtl_cfg). @@ -83,9 +76,7 @@ mk_goto(Name) -> branch_successors(Instr) -> case Instr of - #branch{} -> [hipe_rtl:branch_true_label(Instr), - hipe_rtl:branch_false_label(Instr)]; - #alub{} -> [hipe_rtl:alub_true_label(Instr), + #alub{} -> [hipe_rtl:alub_true_label(Instr), hipe_rtl:alub_false_label(Instr)]; #switch{} -> hipe_rtl:switch_labels(Instr); #call{} -> @@ -106,7 +97,6 @@ fails_to(Instr) -> is_branch(Instr) -> case Instr of - #branch{} -> true; #alub{} -> true; #switch{} -> true; #goto{} -> true; @@ -127,7 +117,7 @@ is_branch(Instr) -> is_pure_branch(Instr) -> case Instr of - #branch{} -> true; + #alub{} -> not hipe_rtl:alub_has_dst(Instr); #switch{} -> true; #goto{} -> true; _ -> false diff --git a/lib/hipe/rtl/hipe_rtl_cleanup_const.erl b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl index 0a1cdbacb8..bfa6b9682e 100644 --- a/lib/hipe/rtl/hipe_rtl_cleanup_const.erl +++ b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_rtl_cleanup_const.erl diff --git a/lib/hipe/rtl/hipe_rtl_exceptions.erl b/lib/hipe/rtl/hipe_rtl_exceptions.erl index 331719f344..03dc959bcf 100644 --- a/lib/hipe/rtl/hipe_rtl_exceptions.erl +++ b/lib/hipe/rtl/hipe_rtl_exceptions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved @@ -27,8 +21,6 @@ %% Notes : %% History : * 2001-04-10 Erik Johansson ([email protected]): %% Created. -%% CVS : -%% $Id$ %% ==================================================================== %% Exports : %% diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl index 71bd06c0df..9dcdd05fb1 100644 --- a/lib/hipe/rtl/hipe_rtl_lcm.erl +++ b/lib/hipe/rtl/hipe_rtl_lcm.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% File : hipe_rtl_lcm.erl @@ -378,7 +372,6 @@ is_expr(I) -> %% end; #alub{} -> false; %% TODO: Split instruction to consider alu expression? - #branch{} -> false; #call{} -> false; %% We cannot prove that a call has no side-effects #comment{} -> false; #enter{} -> false; diff --git a/lib/hipe/rtl/hipe_rtl_liveness.erl b/lib/hipe/rtl/hipe_rtl_liveness.erl index 674043ec3c..98376439f3 100644 --- a/lib/hipe/rtl/hipe_rtl_liveness.erl +++ b/lib/hipe/rtl/hipe_rtl_liveness.erl @@ -1,9 +1,3 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/rtl/hipe_rtl_mk_switch.erl b/lib/hipe/rtl/hipe_rtl_mk_switch.erl index 5f9dffa8cf..d5cc6bd5df 100644 --- a/lib/hipe/rtl/hipe_rtl_mk_switch.erl +++ b/lib/hipe/rtl/hipe_rtl_mk_switch.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved diff --git a/lib/hipe/rtl/hipe_rtl_primops.erl b/lib/hipe/rtl/hipe_rtl_primops.erl index 062fab842f..850a75f71b 100644 --- a/lib/hipe/rtl/hipe_rtl_primops.erl +++ b/lib/hipe/rtl/hipe_rtl_primops.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved @@ -26,9 +20,6 @@ %% Notes : %% History : * 2001-03-15 Erik Johansson ([email protected]): %% Created. -%% -%% $Id$ -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -module(hipe_rtl_primops). @@ -760,12 +751,9 @@ gen_fun_thing_skeleton(FunP, FunName={_Mod,_FunId,Arity}, NumFree, %% And creates a fe (at load time). FeVar = hipe_rtl:mk_new_reg(), PidVar = hipe_rtl:mk_new_reg_gcsafe(), - NativeVar = hipe_rtl:mk_new_reg(), [hipe_rtl:mk_load_address(FeVar, {FunName, MagicNr, Index}, closure), store_struct_field(FunP, ?EFT_FE, FeVar), - load_struct_field(NativeVar, FeVar, ?EFE_NATIVE_ADDRESS), - store_struct_field(FunP, ?EFT_NATIVE_ADDRESS, NativeVar), store_struct_field(FunP, ?EFT_ARITY, hipe_rtl:mk_imm(Arity-NumFree)), @@ -845,7 +833,7 @@ gen_free_vars([], _, _, _, AccCode) -> AccCode. %% call_fun (also handles enter_fun when Continuation = []) gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> - NAddressReg = hipe_rtl:mk_new_reg(), + NCNAddressReg = hipe_rtl:mk_new_reg(), ArityReg = hipe_rtl:mk_new_reg_gcsafe(), [Fun|RevArgs] = lists:reverse(ArgsAndFun), @@ -856,7 +844,7 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> BadFunLabName = hipe_rtl:label_name(NonClosureLabel), BadFunCode = [NonClosureLabel, - hipe_rtl:mk_call([NAddressReg], + hipe_rtl:mk_call([NCNAddressReg], 'nonclosure_address', [Fun, hipe_rtl:mk_imm(length(Args))], hipe_rtl:label_name(CallNonClosureLabel), @@ -865,25 +853,26 @@ gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> CallNonClosureLabel, case Continuation of [] -> - hipe_rtl:mk_enter(NAddressReg, Args, not_remote); + hipe_rtl:mk_enter(NCNAddressReg, Args, not_remote); _ -> - hipe_rtl:mk_call(Dst, NAddressReg, Args, + hipe_rtl:mk_call(Dst, NCNAddressReg, Args, Continuation, Fail, not_remote) end], {BadArityLabName, BadArityCode} = gen_fail_code(Fail, {badarity, Fun}), - CheckGetCode = - hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, NAddressReg, + CNAddressReg = hipe_rtl:mk_new_reg(), + CheckGetCode = + hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, CNAddressReg, Fun, BadFunLabName, 0.9), CheckArityCode = check_arity(ArityReg, length(RevArgs), BadArityLabName), CallCode = case Continuation of [] -> %% This is a tailcall - [hipe_rtl:mk_enter(NAddressReg, ArgsAndFun, not_remote)]; + [hipe_rtl:mk_enter(CNAddressReg, ArgsAndFun, not_remote)]; _ -> %% Ordinary call - [hipe_rtl:mk_call(Dst, NAddressReg, ArgsAndFun, + [hipe_rtl:mk_call(Dst, CNAddressReg, ArgsAndFun, Continuation, Fail, not_remote)] end, [CheckGetCode, CheckArityCode, CallCode, BadFunCode, BadArityCode]. diff --git a/lib/hipe/rtl/hipe_rtl_ssa.erl b/lib/hipe/rtl/hipe_rtl_ssa.erl index 1e3b21d6be..70f9eeedc9 100644 --- a/lib/hipe/rtl/hipe_rtl_ssa.erl +++ b/lib/hipe/rtl/hipe_rtl_ssa.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_rtl_ssa.erl @@ -37,7 +31,7 @@ -include("hipe_rtl.hrl"). -include("../ssa/hipe_ssa.inc"). - + %%---------------------------------------------------------------------- %% Auxiliary operations which seriously differ between Icode and RTL. %%---------------------------------------------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl index f08ff22ed9..3fbbf6287f 100644 --- a/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl +++ b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_rtl_ssa_avail_expr.erl diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl index 7158383010..cad43e2df5 100644 --- a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl +++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -110,8 +104,6 @@ visit_expression(Instruction, Environment) -> visit_alu(Instruction, Environment); #alub{} -> visit_alub(Instruction, Environment); - #branch{} -> - visit_branch(Instruction, Environment); #call{} -> visit_call(Instruction, Environment); %% #comment{} -> @@ -184,42 +176,6 @@ set_to(Dst, Val, Env) -> {[], SSAWork, Env1}. %%----------------------------------------------------------------------------- -%% Procedure : visit_branch/2 -%% Purpose : do symbolic exection of branch instructions. -%% Arguments : Inst - The instruction -%% Env - The environment -%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} -%%----------------------------------------------------------------------------- - -visit_branch(Inst, Env) -> %% Titta också på exekverbarflagga - Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env), - Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env), - CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of - true -> [hipe_rtl:branch_true_label(Inst)]; - false -> [hipe_rtl:branch_false_label(Inst)]; - bottom -> [hipe_rtl:branch_true_label(Inst), - hipe_rtl:branch_false_label(Inst)]; - top -> [] - end, - {CFGWL, [], Env}. - -%%----------------------------------------------------------------------------- -%% Procedure : evaluate_relop/3 -%% Purpose : evaluate the given relop. While taking care to handle top & -%% bottom in some sane way. -%% Arguments : Val1, Val2 - The operands Integers or top or bottom -%% RelOp - some relop atom from rtl. -%% Returns : bottom, top, true or false -%%----------------------------------------------------------------------------- - -evaluate_relop(Val1, RelOp, Val2) -> - if - (Val1==bottom) or (Val2==bottom) -> bottom ; - (Val1==top) or (Val2==top) -> top; - true -> hipe_rtl_arch:eval_cond(RelOp, Val1, Val2) - end. - -%%----------------------------------------------------------------------------- %% Procedure : evaluate_fixnumop/2 %% Purpose : try to evaluate a fixnumop. %% Arguments : Val1 - operand (an integer, 'top' or 'bottom') @@ -408,6 +364,7 @@ partial_eval_branch(Cond, N0, Z0, V0, C0) -> Cond =:= 'ne' -> {true, Z0, true, true}; Cond =:= 'gt'; Cond =:= 'le' -> {N0, Z0, V0, true}; + Cond =:= 'leu'; Cond =:= 'gtu' -> {true, Z0, true, C0 }; Cond =:= 'lt'; Cond =:= 'ge' -> {N0, true, V0, true}; @@ -450,7 +407,11 @@ visit_alub(Inst, Env) -> false -> [hipe_rtl:alub_false_label(Inst)] end end, - {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal, Env), + {[], NewSSA, NewEnv} = + case hipe_rtl:alub_has_dst(Inst) of + false -> {[], [], Env}; + true -> set_to(hipe_rtl:alub_dst(Inst), NewVal, Env) + end, {Labels, NewSSA, NewEnv}. %%----------------------------------------------------------------------------- @@ -688,8 +649,6 @@ update_instruction(Inst, Env) -> update_alu(Inst, Env); #alub{} -> update_alub(Inst, Env); - #branch{} -> - update_branch(Inst, Env); #call{} -> subst_all_uses(Inst, Env); %% #comment{} -> @@ -902,33 +861,6 @@ update_alu(Inst, Env) -> {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))] end. - -%%----------------------------------------------------------------------------- -%% Procedure : update_branch/2 -%% Purpose : update an branch-instruction -%% Arguments : Inst - the instruction. -%% Env - in which everything happens. -%% Returns : list of new instruction -%%----------------------------------------------------------------------------- - -update_branch(Inst, Env) -> - Src1 = hipe_rtl:branch_src1(Inst), - Src2 = hipe_rtl:branch_src2(Inst), - Val1 = lookup_lattice_value(Src1, Env), - Val2 = lookup_lattice_value(Src2, Env), - if - (Val1 =:= bottom) and (Val2 =:= bottom) -> - [Inst]; - Val1 =:= bottom -> - [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)]; - Val2 =:= bottom -> - [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)]; - true -> - case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of - true -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))]; - false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))] - end - end. %%----------------------------------------------------------------------------- %% Procedure : update_alub/2 @@ -943,8 +875,12 @@ update_branch(Inst, Env) -> %% some small helpers. alub_to_move(Inst, Res, Lab) -> - [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res), - hipe_rtl:mk_goto(Lab)]. + Goto = [hipe_rtl:mk_goto(Lab)], + case hipe_rtl:alub_has_dst(Inst) of + false -> Goto; + true -> + [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res) | Goto] + end. make_alub_subst_list(bottom, _, Tail) -> Tail; make_alub_subst_list(top, Src, _) -> diff --git a/lib/hipe/rtl/hipe_rtl_ssapre.erl b/lib/hipe/rtl/hipe_rtl_ssapre.erl index df1a4b9376..eacaa28196 100644 --- a/lib/hipe/rtl/hipe_rtl_ssapre.erl +++ b/lib/hipe/rtl/hipe_rtl_ssapre.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% File : hipe_rtl_ssapre.erl diff --git a/lib/hipe/rtl/hipe_rtl_symbolic.erl b/lib/hipe/rtl/hipe_rtl_symbolic.erl index 1d7e0ec55e..8ca307952b 100644 --- a/lib/hipe/rtl/hipe_rtl_symbolic.erl +++ b/lib/hipe/rtl/hipe_rtl_symbolic.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%------------------------------------------------------------------- %% File : hipe_rtl_symbolic.erl diff --git a/lib/hipe/rtl/hipe_rtl_varmap.erl b/lib/hipe/rtl/hipe_rtl_varmap.erl index 31165d91a4..375a8f85c0 100644 --- a/lib/hipe/rtl/hipe_rtl_varmap.erl +++ b/lib/hipe/rtl/hipe_rtl_varmap.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved -%% Time-stamp: <2008-04-20 14:55:35 richard> %% ==================================================================== %% Module : hipe_rtl_varmap %% Purpose : diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl index 8825a3ade3..68cbe75e85 100644 --- a/lib/hipe/rtl/hipe_tagscheme.erl +++ b/lib/hipe/rtl/hipe_tagscheme.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2015. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%======================================================================== %% @@ -25,9 +19,6 @@ %% %% Modifications: %% 020904: Happi - added support for external pids and ports. -%% -%%======================================================================== -%% $Id$ %%======================================================================== -module(hipe_tagscheme). @@ -49,6 +40,7 @@ fixnum_gt/5, fixnum_lt/5, fixnum_ge/5, fixnum_le/5, fixnum_val/1, fixnum_mul/4, fixnum_addsub/5, fixnum_andorxor/4, fixnum_not/2, fixnum_bsr/3, fixnum_bsl/3]). +-export([test_either_immed/4]). -export([unsafe_car/2, unsafe_cdr/2, unsafe_constant_element/3, unsafe_update_element/3, element/6]). -export([unsafe_closure_element/3]). @@ -68,9 +60,7 @@ -include("hipe_rtl.hrl"). -include("hipe_literals.hrl"). --ifdef(EFT_NATIVE_ADDRESS). -export([if_fun_get_arity_and_address/5]). --endif. -undef(TAG_PRIMARY_BOXED). -undef(TAG_IMMED2_MASK). @@ -173,22 +163,21 @@ test_nil(X, TrueLab, FalseLab, Pred) -> hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred). test_cons(X, TrueLab, FalseLab, Pred) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST), - hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). test_is_boxed(X, TrueLab, FalseLab, Pred) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED), - hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). get_header(Res, X) -> hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))). mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)), - hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)]. + [hipe_rtl:mk_alu(Tmp, X, 'sub', hipe_rtl:mk_imm(Value)), + hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(Mask), + eq, TrueLab, FalseLab, Pred)]. test_immed1(X, Value, TrueLab, FalseLab, Pred) -> mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred). @@ -240,13 +229,12 @@ test_atom(X, TrueLab, FalseLab, Pred) -> test_tuple(X, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), HalfTrueLab = hipe_rtl:mk_new_label(), [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), HalfTrueLab, get_header(Tmp, X), - hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - TrueLab, FalseLab, Pred)]. + hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + TrueLab, FalseLab, Pred)]. test_tuple_N(X, N, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), @@ -282,7 +270,6 @@ test_ref(X, TrueLab, FalseLab, Pred) -> TrueLab, FalseLab, Pred) ]. --ifdef(EFT_NATIVE_ADDRESS). test_closure(X, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), HalfTrueLab = hipe_rtl:mk_new_label(), @@ -291,7 +278,6 @@ test_closure(X, TrueLab, FalseLab, Pred) -> get_header(Tmp, X), mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_FUN, TrueLab, FalseLab, Pred)]. --endif. test_fun(X, TrueLab, FalseLab, Pred) -> Hdr = hipe_rtl:mk_new_reg_gcsafe(), @@ -378,14 +364,17 @@ test_matchstate(X, TrueLab, FalseLab, Pred) -> mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_BIN_MATCHSTATE, TrueLab, FalseLab, Pred)]. +test_bitstr_header(HdrTmp, TrueLab, FalseLab, Pred) -> + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + mask_and_compare(HdrTmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred). + test_bitstr(X, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), HalfTrueLab = hipe_rtl:mk_new_label(), - Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), HalfTrueLab, get_header(Tmp, X), - mask_and_compare(Tmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred)]. + test_bitstr_header(Tmp, TrueLab, FalseLab, Pred)]. test_binary(X, TrueLab, FalseLab, Pred) -> Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), @@ -393,12 +382,10 @@ test_binary(X, TrueLab, FalseLab, Pred) -> IsBoxedLab = hipe_rtl:mk_new_label(), IsBitStrLab = hipe_rtl:mk_new_label(), IsSubBinLab = hipe_rtl:mk_new_label(), - Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, [test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), FalseLab, Pred), IsBoxedLab, get_header(Tmp1, X), - mask_and_compare(Tmp1, Mask, ?TAG_HEADER_REFC_BIN, - hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), + test_bitstr_header(Tmp1, hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), IsBitStrLab, mask_and_compare(Tmp1, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, hipe_rtl:label_name(IsSubBinLab), TrueLab, 0.5), @@ -468,14 +455,23 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), test_fixnums_1([Tmp|Args], [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc]). +test_two_fixnums(Arg, Arg, FalseLab) -> + TrueLab = hipe_rtl:mk_new_label(), + [test_fixnum(Arg, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab]; test_two_fixnums(Arg1, Arg2, FalseLab) -> TrueLab = hipe_rtl:mk_new_label(), - case hipe_rtl:is_imm(Arg2) of + case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of true -> - Value = hipe_rtl:imm_value(Arg2), + {Imm, Var} = + case hipe_rtl:is_imm(Arg1) of + true -> {Arg1, Arg2}; + false -> {Arg2, Arg1} + end, + Value = hipe_rtl:imm_value(Imm), case Value band ?TAG_IMMED1_MASK of ?TAG_IMMED1_SMALL -> - [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + [test_fixnum(Var, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), TrueLab]; _ -> [hipe_rtl:mk_goto(FalseLab)] @@ -516,28 +512,48 @@ unsafe_fixnum_sub(Arg1, Arg2, Res) -> %%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag %%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag -fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), +fixnum_addsub(AluOp, Arg1, Arg2, FinalRes, OtherLab) -> + NoOverflowLab = hipe_rtl:mk_new_label(), %% XXX: Consider moving this test to the users of fixnum_addsub. - case Arg1 =/= Res andalso Arg2 =/= Res of - true -> - %% Args differ from res. - NoOverflowLab = hipe_rtl:mk_new_label(), - [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), - hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow, - hipe_rtl:label_name(NoOverflowLab), - hipe_rtl:label_name(OtherLab), 0.99), - NoOverflowLab]; + {Res, Tail} = + case Arg1 =/= FinalRes andalso Arg2 =/= FinalRes of + true -> + %% Args differ from res. + {FinalRes, [NoOverflowLab]}; + false -> + %% At least one of the arguments is the same as Res. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + {Tmp, [NoOverflowLab, hipe_rtl:mk_move(FinalRes, Tmp)]} + end, + case (hipe_rtl:is_imm(Arg1) andalso AluOp =:= 'add') + orelse hipe_rtl:is_imm(Arg2) + of + true -> + %% Pre-compute the untagged immediate. The optimisers won't do this for us + %% since they don't know that the untag never underflows. + {Var, Imm0} = + case hipe_rtl:is_imm(Arg2) of + true -> {Arg1, Arg2}; + false -> {Arg2, Arg1} + end, + Imm = hipe_rtl:mk_imm(hipe_rtl:imm_value(Imm0) - ?TAG_IMMED1_SMALL), + [hipe_rtl:mk_alub(Res, Var, AluOp, Imm, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99) + |Tail]; false -> - %% At least one of the arguments is the same as Res. - Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg? - NoOverflowLab = hipe_rtl:mk_new_label(), - [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), - hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow, + %% Commute add to save a move on x86 + {UntagFirst, Lhs, Rhs} = + case AluOp of + 'add' -> {Arg1, Res, Arg2}; + 'sub' -> {Arg2, Arg1, Res} + end, + [hipe_rtl:mk_alu(Res, UntagFirst, sub, + hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Res, Lhs, AluOp, Rhs, not_overflow, hipe_rtl:label_name(NoOverflowLab), - hipe_rtl:label_name(OtherLab), 0.99), - NoOverflowLab, - hipe_rtl:mk_move(Res, Tmp2)] + hipe_rtl:label_name(OtherLab), 0.99) + |Tail] end. %%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag @@ -557,8 +573,8 @@ fixnum_andorxor(AluOp, Arg1, Arg2, Res) -> case AluOp of 'xor' -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - [hipe_rtl:mk_alu(Tmp, Arg1, 'xor', Arg2), % clears tag :-( - hipe_rtl:mk_alu(Res, Tmp, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]; + [hipe_rtl:mk_alu(Tmp, Arg1, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Tmp, 'xor', Arg2)]; _ -> hipe_rtl:mk_alu(Res, Arg1, AluOp, Arg2) end. @@ -585,6 +601,21 @@ fixnum_bsl(Arg1, Arg2, Res) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Test if either of two values are immediate (primary tag IMMED1, 0x3) +test_either_immed(Arg1, Arg2, TrueLab, FalseLab) -> + %% This test assumes primary tag 0x0 is reserved and immed has tag 0x3 + 16#0 = ?TAG_PRIMARY_HEADER, + 16#3 = ?TAG_PRIMARY_IMMED1, + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp1, Arg1, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp2, Arg2, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp2, Tmp2, 'or', Tmp1), + hipe_rtl:mk_branch(Tmp2, 'and', hipe_rtl:mk_imm(2), eq, + FalseLab, TrueLab, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + unsafe_car(Dst, Arg) -> hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST))). @@ -621,14 +652,13 @@ unsafe_update_element(Tuple, Index, Value) -> % Index is an immediate element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_imm(A), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of valid -> %% This is no branch, 1 load and 3 alus = 4 instr + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple [untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_alu(Offset, UIndex, 'sll', @@ -637,97 +667,78 @@ element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> fixnums -> %% This is 1 branch, 1 load and 4 alus = 6 instr [untag_fixnum(UIndex, Index), - hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; _ -> %% This is 3 branches, 1 load and 5 alus = 9 instr [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), FixnumOkLab, untag_fixnum(UIndex, Index), - hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end; element(Dst, Index, Tuple, FailLabName, tuple, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple Header = hipe_rtl:mk_new_reg_gcsafe(), UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_new_reg_gcsafe(), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of fixnums -> %% This is 1 branch, 2 loads and 5 alus = 8 instr - [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + [get_header(Header, Tuple), untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; Num when is_integer(Num) -> %% This is 1 branch, 1 load and 3 alus = 5 instr - [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| - gen_element_tail(Dst, Ptr, InvIndex, hipe_rtl:mk_imm(Num), - Offset, UIndex, FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, hipe_rtl:mk_imm(Num), UIndex, FailLabName, + IndexOkLab); _ -> %% This is 2 branches, 2 loads and 6 alus = 10 instr [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), FixnumOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + get_header(Header, Tuple), untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, - FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end; element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> FixnumOkLab = hipe_rtl:mk_new_label(), BoxedOkLab = hipe_rtl:mk_new_label(), TupleOkLab = hipe_rtl:mk_new_label(), IndexOkLab = hipe_rtl:mk_new_label(), - Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple Header = hipe_rtl:mk_new_reg_gcsafe(), - Tmp = hipe_rtl:mk_new_reg_gcsafe(), UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_new_reg_gcsafe(), - InvIndex = hipe_rtl:mk_new_reg_gcsafe(), - Offset = hipe_rtl:mk_new_reg_gcsafe(), case IndexInfo of fixnums -> %% This is 3 branches, 2 loads and 5 alus = 10 instr [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + get_header(Header, Tuple), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)]; Num when is_integer(Num) -> %% This is 3 branches, 2 loads and 4 alus = 9 instr [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + get_header(Header, Tuple), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - hipe_rtl:mk_imm(Num), FailLabName, IndexOkLab)]; + gen_element_tail(Dst, Tuple, Arity, hipe_rtl:mk_imm(Num), FailLabName, + IndexOkLab)]; _ -> %% This is 4 branches, 2 loads, and 6 alus = 12 instr :( [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), @@ -736,29 +747,29 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), FailLabName, 0.99), BoxedOkLab, - hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), - hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + get_header(Header, Tuple), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| - gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab)] + gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab)] end. -gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, - UIndex, FailLabName, IndexOkLab) -> +gen_element_tail(Dst, Tuple, Arity, UIndex, FailLabName, IndexOkLab) -> + ZeroIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple %% now check that 1 <= UIndex <= Arity - %% if UIndex < 1, then (Arity - UIndex) >= Arity - %% if UIndex > Arity, then (Arity - UIndex) < 0, which is >=u Arity - %% otherwise, 0 <= (Arity - UIndex) < Arity - [hipe_rtl:mk_alu(InvIndex, Arity, 'sub', UIndex), - hipe_rtl:mk_branch(InvIndex, 'geu', Arity, FailLabName, + %% by checking the equivalent (except for when Arity>=2^(WordSize-1)) + %% (UIndex - 1) <u Arity + [hipe_rtl:mk_alu(ZeroIndex, UIndex, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_branch(ZeroIndex, 'geu', Arity, FailLabName, hipe_rtl:label_name(IndexOkLab), 0.01), IndexOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_alu(Offset, UIndex, 'sll', hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), hipe_rtl:mk_load(Dst, Ptr, Offset)]. @@ -781,10 +792,9 @@ tag_fun(Res, X) -> %% untag_fun(Res, X) -> %% hipe_rtl:mk_alu(Res, X, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)). --ifdef(EFT_NATIVE_ADDRESS). if_fun_get_arity_and_address(ArityReg, AddressReg, FunP, BadFunLab, Pred) -> %% EmuAddressPtrReg = hipe_rtl:mk_new_reg(), - %% FEPtrReg = hipe_rtl:mk_new_reg(), + FEPtrReg = hipe_rtl:mk_new_reg(), %% ArityReg = hipe_rtl:mk_new_reg(), %% NumFreeReg = hipe_rtl:mk_new_reg(), %% RealArityReg = hipe_rtl:mk_new_reg(), @@ -797,11 +807,12 @@ if_fun_get_arity_and_address(ArityReg, AddressReg, FunP, BadFunLab, Pred) -> hipe_rtl:mk_load(ArityReg, FunP, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ ?EFT_ARITY)), - hipe_rtl:mk_load(AddressReg, FunP, + hipe_rtl:mk_load(FEPtrReg, FunP, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ - ?EFT_NATIVE_ADDRESS))], + ?EFT_FE)), + hipe_rtl:mk_load(AddressReg, FEPtrReg, + hipe_rtl:mk_imm(?EFE_NATIVE_ADDRESS))], IsFunCode ++ GetArityCode. --endif. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -874,12 +885,10 @@ heap_arch_spec(HP) -> hipe_rtl_arch:pcb_store(?P_OFF_HEAP_FIRST, HP)]. test_heap_binary(Binary, TrueLblName, FalseLblName) -> - Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), - [get_header(Tmp1, Binary), - hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), - hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), - TrueLblName, FalseLblName)]. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp, Binary), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_HEAP_BIN, + TrueLblName, FalseLblName, 0.5)]. mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, @@ -907,11 +916,10 @@ build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, set_field_from_term({sub_binary, orig}, Dst, Orig)]. test_subbinary(Binary, TrueLblName, FalseLblName) -> - Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), - [get_header(Tmp1, Binary), - hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), - hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)]. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp, Binary), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, + TrueLblName, FalseLblName, 0.5)]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/sparc/Makefile b/lib/hipe/sparc/Makefile index 0e36a43d8e..ac1230df7c 100644 --- a/lib/hipe/sparc/Makefile +++ b/lib/hipe/sparc/Makefile @@ -63,7 +63,8 @@ MODULES=hipe_rtl_to_sparc \ hipe_sparc_ra_naive \ hipe_sparc_ra_postconditions \ hipe_sparc_ra_postconditions_fp \ - hipe_sparc_registers + hipe_sparc_registers \ + hipe_sparc_subst HRL_FILES=hipe_sparc.hrl ERL_FILES=$(MODULES:%=%.erl) diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl index f9c043eafe..d1a6b15508 100644 --- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_rtl_to_sparc). @@ -63,7 +56,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -281,7 +273,12 @@ mk_alu_rs(XAluOp, Src1, Src2, Dst) -> conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + HasDst = hipe_rtl:alub_has_dst(I), + {Dst, Map0} = + case HasDst of + false -> {hipe_sparc:mk_g0(), Map}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map) + end, {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), Cond = conv_cond(hipe_rtl:alub_cond(I)), @@ -307,67 +304,33 @@ conv_alub(I, Map, Data) -> I1 ++ [hipe_sparc:mk_rdy(TmpHi), hipe_sparc:mk_alu('sra', Dst, hipe_sparc:mk_uimm5(31), TmpSign) | - conv_alub2(G0, TmpSign, 'sub', NewCond, TmpHi, I)]; + conv_alub2(G0, TmpSign, 'cmpcc', NewCond, TmpHi, I)]; _ -> - conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) + XAluOp = + case (not HasDst) andalso RtlAlubOp =:= 'sub' of + true -> 'cmpcc'; % == a subcc that commutes + false -> conv_alubop_cc(RtlAlubOp) + end, + conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) end, {I2, Map2, Data}. --ifdef(notdef). % XXX: only for sparc64, alas -conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> - case conv_cond_rcond(Cond) of - [] -> - conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I); - RCond -> - conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) - end. - -conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) -> - TrueLab = hipe_rtl:alub_true_label(I), - FalseLab = hipe_rtl:alub_false_label(I), - Pred = hipe_rtl:alub_pred(I), - %% "Dst = Src1 AluOp Src2; if COND" becomes - %% "Dst = Src1 AluOp Src2; if-COND(Dst)" - {I2, _DidCommute} = mk_alu(conv_alubop_nocc(RtlAlubOp), Src1, Src2, Dst), - I2 ++ mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred). - -conv_cond_rcond(Cond) -> - case Cond of - 'e' -> 'z'; - 'ne' -> 'nz'; - 'g' -> 'gz'; - 'ge' -> 'gez'; - 'l' -> 'lz'; - 'le' -> 'lez'; - _ -> [] % vs, vc, gu, geu, lu, leu - end. - -conv_alubop_nocc(RtlAlubOp) -> - case RtlAlubOp of - 'add' -> 'add'; - 'sub' -> 'sub'; - %% mul: handled elsewhere - 'or' -> 'or'; - 'and' -> 'and'; - 'xor' -> 'xor' - %% no shift ops - end. - -mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred) -> - [hipe_sparc:mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred)]. --else. -conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> - conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I). --endif. +conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) -> + conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I). -conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> +conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I) -> TrueLab = hipe_rtl:alub_true_label(I), FalseLab = hipe_rtl:alub_false_label(I), Pred = hipe_rtl:alub_pred(I), %% "Dst = Src1 AluOp Src2; if COND" becomes %% "Dst = Src1 AluOpCC Src22; if-COND(CC)" - {I2, _DidCommute} = mk_alu(conv_alubop_cc(RtlAlubOp), Src1, Src2, Dst), - I2 ++ mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred). + {I2, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), + NewCond = + case DidCommute andalso XAluOp =:= 'cmpcc' of + true -> commute_cond(Cond); % subcc does not commute; its conditions do + false -> Cond + end, + I2 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred). conv_alubop_cc(RtlAlubOp) -> case RtlAlubOp of @@ -380,69 +343,6 @@ conv_alubop_cc(RtlAlubOp) -> %% no shift ops end. -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cond = conv_cond(hipe_rtl:branch_cond(I)), - I2 = conv_branch2(Src1, Cond, Src2, I), - {I2, Map1, Data}. - --ifdef(notdef). % XXX: only for sparc64, alas -conv_branch2(Src1, Cond, Src2, I) -> - case conv_cond_rcond(Cond) of - [] -> - conv_branch_bp(Src1, Cond, Src2, I); - RCond -> - conv_branch_br(Src1, RCond, Src2, I) - end. - -conv_branch_br(Src1, RCond, Src2, I) -> - TrueLab = hipe_rtl:branch_true_label(I), - FalseLab = hipe_rtl:branch_false_label(I), - Pred = hipe_rtl:branch_pred(I), - %% "if src1-COND-src2" becomes - %% "sub src1,src2,tmp; if-COND(tmp)" - Dst = hipe_sparc:mk_new_temp('untagged'), - XAluOp = 'cmp', % == a sub that commutes - {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), - NewRCond = - case DidCommute of - true -> commute_rcond(RCond); - false -> RCond - end, - I1 ++ mk_pseudo_br(NewRCond, Dst, TrueLab, FalseLab, Pred). - -commute_rcond(RCond) -> % if x RCond y, then y commute_rcond(RCond) x - case RCond of - 'z' -> 'z'; % ==, == - 'nz' -> 'nz'; % !=, != - 'gz' -> 'lz'; % >, < - 'gez' -> 'lez'; % >=, <= - 'lz' -> 'gz'; % <, > - 'lez' -> 'gez' % <=, >= - end. --else. -conv_branch2(Src1, Cond, Src2, I) -> - conv_branch_bp(Src1, Cond, Src2, I). --endif. - -conv_branch_bp(Src1, Cond, Src2, I) -> - TrueLab = hipe_rtl:branch_true_label(I), - FalseLab = hipe_rtl:branch_false_label(I), - Pred = hipe_rtl:branch_pred(I), - %% "if src1-COND-src2" becomes - %% "subcc src1,src2,%g0; if-COND(CC)" - Dst = hipe_sparc:mk_g0(), - XAluOp = 'cmpcc', % == a subcc that commutes - {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), - NewCond = - case DidCommute of - true -> commute_cond(Cond); - false -> Cond - end, - I1 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred). - conv_call(I, Map, Data) -> {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), @@ -625,7 +525,7 @@ conv_return(I, Map, Data) -> {I2, Map0, Data}. conv_store(I, Map, Data) -> - {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), % no immediates allowed + {Base1, Map0} = conv_src(hipe_rtl:store_base(I), Map), {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), StOp = conv_stop(hipe_rtl:store_size(I)), @@ -649,13 +549,27 @@ mk_store(StOp, Src, Base1, Base2) -> end. mk_store2(StOp, Src, Base1, Base2) -> - case hipe_sparc:is_temp(Base2) of + case hipe_sparc:is_temp(Base1) of true -> - mk_store_rr(StOp, Src, Base1, Base2); + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_rr(StOp, Src, Base1, Base2); + _ -> + mk_store_ri(StOp, Src, Base1, Base2) + end; _ -> - mk_store_ri(StOp, Src, Base1, Base2) + case hipe_sparc:is_temp(Base2) of + true -> + mk_store_ri(StOp, Src, Base2, Base1); + _ -> + mk_store_ii(StOp, Src, Base1, Base2) + end end. +mk_store_ii(StOp, Src, Base, Disp) -> + Tmp = new_untagged_temp(), + mk_set(Base, Tmp, mk_store_ri(StOp, Src, Tmp, Disp)). + mk_store_ri(StOp, Src, Base, Disp) -> hipe_sparc:mk_store(StOp, Src, Base, Disp, 'new', []). diff --git a/lib/hipe/sparc/hipe_sparc.erl b/lib/hipe/sparc/hipe_sparc.erl index 5ecb6aa8b9..22e0761b69 100644 --- a/lib/hipe/sparc/hipe_sparc.erl +++ b/lib/hipe/sparc/hipe_sparc.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc). -export([ @@ -94,6 +87,9 @@ mk_pseudo_set/2, + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, pseudo_tailcall_funv/1, pseudo_tailcall_linkage/1, @@ -124,6 +120,9 @@ pseudo_fmove_src/1, pseudo_fmove_dst/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + mk_pseudo_fstore/3, mk_fstore/4, @@ -276,6 +275,10 @@ mk_pseudo_ret() -> #pseudo_ret{}. mk_pseudo_set(Imm, Dst) -> #pseudo_set{imm=Imm, dst=Dst}. +mk_pseudo_spill_move(Src, Temp, Dst) -> + #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(FunV, Arity, StkArgs, Linkage) -> #pseudo_tailcall{funv=FunV, arity=Arity, stkargs=StkArgs, linkage=Linkage}. pseudo_tailcall_funv(#pseudo_tailcall{funv=FunV}) -> FunV. @@ -382,6 +385,10 @@ is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. +mk_pseudo_spill_fmove(Src, Temp, Dst) -> + #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + mk_pseudo_fstore(Src, Base, Disp) -> #pseudo_fstore{src=Src, base=Base, disp=Disp}. diff --git a/lib/hipe/sparc/hipe_sparc.hrl b/lib/hipe/sparc/hipe_sparc.hrl index 01c8d07f22..f60e516e59 100644 --- a/lib/hipe/sparc/hipe_sparc.hrl +++ b/lib/hipe/sparc/hipe_sparc.hrl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% %%%-------------------------------------------------------------------- %%% Basic Values: @@ -95,6 +88,8 @@ -record(pseudo_move, {src, dst}). -record(pseudo_ret, {}). -record(pseudo_set, {imm, dst}). +-record(pseudo_spill_fmove, {src, temp, dst}). +-record(pseudo_spill_move, {src, temp, dst}). -record(pseudo_tailcall, {funv, arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(rdy, {dst}). diff --git a/lib/hipe/sparc/hipe_sparc_assemble.erl b/lib/hipe/sparc/hipe_sparc_assemble.erl index e92f024968..2b82f41d23 100644 --- a/lib/hipe/sparc/hipe_sparc_assemble.erl +++ b/lib/hipe/sparc/hipe_sparc_assemble.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_assemble). -export([assemble/4]). @@ -39,7 +32,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, 4), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap), Options), diff --git a/lib/hipe/sparc/hipe_sparc_cfg.erl b/lib/hipe/sparc/hipe_sparc_cfg.erl index 0b2c77f27b..45c8e887b5 100644 --- a/lib/hipe/sparc/hipe_sparc_cfg.erl +++ b/lib/hipe/sparc/hipe_sparc_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,20 +11,19 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_cfg). -export([init/1, labels/1, start_label/1, succ/2, + map_bbs/2, fold_bbs/3, bb/2, bb_add/3]). -export([postorder/1, reverse_postorder/1]). -export([linearise/1]). -export([params/1]). -export([arity/1]). % for linear scan +-export([redirect_jmp/3, branch_preds/1]). -define(SPARC_CFG, true). % needed for cfg.inc @@ -83,28 +78,53 @@ branch_successors(Branch) -> #pseudo_tailcall{} -> [] end. +branch_preds(Branch) -> + case Branch of + #jmp{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_bp{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#sparc_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. --ifdef(notdef). redirect_jmp(I, Old, New) -> case I of - #b_label{label=Label} -> - if Old =:= Label -> I#b_label{label=New}; + #bp{'cond'='a',label=Label} -> + if Old =:= Label -> I#bp{label=New}; true -> I end; - #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> - I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + #pseudo_bp{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bp{true_label=New}; true -> I end, - if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + if Old =:= FalseLab -> I1#pseudo_bp{false_label=New}; true -> I1 end; - %% handle pseudo_call too? - _ -> I + #pseudo_call{contlab=ContLab0, sdesc=SDesc0} -> + SDesc = case SDesc0 of + #sparc_sdesc{exnlab=Old} -> SDesc0#sparc_sdesc{exnlab=New}; + #sparc_sdesc{exnlab=_} -> SDesc0 + end, + ContLab = if Old =:= ContLab0 -> New; + true -> ContLab0 + end, + I#pseudo_call{sdesc=SDesc, contlab=ContLab} end. --endif. mk_goto(Label) -> hipe_sparc:mk_b_label(Label). diff --git a/lib/hipe/sparc/hipe_sparc_defuse.erl b/lib/hipe/sparc/hipe_sparc_defuse.erl index 4f66299f1d..4d4b11e301 100644 --- a/lib/hipe/sparc/hipe_sparc_defuse.erl +++ b/lib/hipe/sparc/hipe_sparc_defuse.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,14 +11,12 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_defuse). -export([insn_def_all/1, insn_use_all/1]). -export([insn_def_gpr/1, insn_use_gpr/1]). -export([insn_def_fpr/1, insn_use_fpr/1]). +-export([insn_defs_all_gpr/1, insn_defs_all_fpr/1]). -include("hipe_sparc.hrl"). %%% @@ -45,12 +39,19 @@ insn_def_gpr(I) -> #pseudo_call{} -> call_clobbered_gpr(); #pseudo_move{dst=Dst} -> [Dst]; #pseudo_set{dst=Dst} -> [Dst]; + #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); #rdy{dst=Dst} -> [Dst]; #sethi{dst=Dst} -> [Dst]; _ -> [] end. +insn_defs_all_gpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_gpr() -> [hipe_sparc:mk_temp(R, T) || {R,T} <- hipe_sparc_registers:call_clobbered() ++ all_fp_pseudos()]. @@ -72,6 +73,7 @@ insn_use_gpr(I) -> funv_use(FunV, arity_use_gpr(Arity)); #pseudo_move{src=Src} -> [Src]; #pseudo_ret{} -> [hipe_sparc:mk_rv()]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{funv=FunV,arity=Arity,stkargs=StkArgs} -> addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), funv_use(FunV, arity_use_gpr(Arity)))); #store{src=Src,base=Base,disp=Disp} -> @@ -112,9 +114,16 @@ insn_def_fpr(I) -> #fp_unary{dst=Dst} -> [Dst]; #pseudo_fload{dst=Dst} -> [Dst]; #pseudo_fmove{dst=Dst} -> [Dst]; + #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; _ -> [] end. +insn_defs_all_fpr(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + call_clobbered_fpr() -> [hipe_sparc:mk_temp(R, 'double') || R <- hipe_sparc_registers:allocatable_fpr()]. @@ -124,6 +133,7 @@ insn_use_fpr(I) -> #fp_unary{src=Src} -> [Src]; #pseudo_fmove{src=Src} -> [Src]; #pseudo_fstore{src=Src} -> [Src]; + #pseudo_spill_fmove{src=Src} -> [Src]; _ -> [] end. diff --git a/lib/hipe/sparc/hipe_sparc_encode.erl b/lib/hipe/sparc/hipe_sparc_encode.erl index 6a7d502fd3..f0ee2d1647 100644 --- a/lib/hipe/sparc/hipe_sparc_encode.erl +++ b/lib/hipe/sparc/hipe_sparc_encode.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Encode symbolic SPARC instructions to binary form. %%% Copyright (C) 2007-2008 Mikael Pettersson diff --git a/lib/hipe/sparc/hipe_sparc_finalise.erl b/lib/hipe/sparc/hipe_sparc_finalise.erl index 2b7125fb73..3634a4d14c 100644 --- a/lib/hipe/sparc/hipe_sparc_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_finalise). -export([finalise/1]). diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index a42c1983f4..1f2a259ca1 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_frame). -export([frame/1]). @@ -25,16 +18,14 @@ -include("hipe_sparc.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun) -> - Formals = fix_formals(hipe_sparc:defun_formals(Defun)), - Temps0 = all_temps(hipe_sparc:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG) -> + Formals = fix_formals(hipe_sparc_cfg:params(CFG)), + Temps0 = all_temps(CFG, Formals), + MinFrame = defun_minframe(CFG), Temps = ensure_minframe(MinFrame, Temps0), - ClobbersRA = clobbers_ra(hipe_sparc:defun_code(Defun)), - CFG0 = hipe_sparc_cfg:init(Defun), - Liveness = hipe_sparc_liveness_all:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersRA), - hipe_sparc_cfg:linearise(CFG1). + ClobbersRA = clobbers_ra(CFG), + Liveness = hipe_sparc_liveness_all:analyse(CFG), + do_body(CFG, Liveness, Formals, Temps, ClobbersRA). fix_formals(Formals) -> fix_formals(hipe_sparc_registers:nr_args(), Formals). @@ -91,6 +82,10 @@ do_insn(I, LiveOut, Context, FPoff) -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #pseudo_fmove{} -> {do_pseudo_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; _ -> {[I], FPoff} end. @@ -112,7 +107,26 @@ do_pseudo_move(I, Context, FPoff) -> Offset = pseudo_offset(Src, FPoff, Context), mk_load(hipe_sparc:mk_sp(), Offset, Dst, []); _ -> - [hipe_sparc:mk_mov(Src, Dst)] + case hipe_sparc:temp_reg(Dst) =:= hipe_sparc:temp_reg(Src) of + true -> []; + false -> [hipe_sparc:mk_mov(Src, Dst)] + end + end + end. + +do_pseudo_spill_move(I, Context, FPoff) -> + #pseudo_spill_move{src=Src,temp=Temp,dst=Dst} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to move + do_pseudo_move(hipe_sparc:mk_pseudo_move(Src, Dst), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_load(hipe_sparc:mk_sp(), SrcOffset, Temp, + mk_store(Temp, hipe_sparc:mk_sp(), DstOffset, [])) end end. @@ -133,6 +147,22 @@ do_pseudo_fmove(I, Context, FPoff) -> end end. +do_pseudo_spill_fmove(I, Context, FPoff) -> + #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst} = I, + case temp_is_pseudo(Src) andalso temp_is_pseudo(Dst) of + false -> % Register allocator changed its mind, turn back to fmove + do_pseudo_fmove(hipe_sparc:mk_pseudo_fmove(Src, Dst), Context, FPoff); + true -> + SrcOffset = pseudo_offset(Src, FPoff, Context), + DstOffset = pseudo_offset(Dst, FPoff, Context), + case SrcOffset =:= DstOffset of + true -> []; % omit move-to-self + false -> + mk_fload(hipe_sparc:mk_sp(), SrcOffset, Temp) + ++ mk_fstore(Temp, hipe_sparc:mk_sp(), DstOffset) + end + end. + pseudo_offset(Temp, FPoff, Context) -> FPoff + context_offset(Context, Temp). @@ -550,29 +580,41 @@ temp_is_pseudo(Temp) -> %%% Detect if a Defun's body clobbers RA. %%% -clobbers_ra(Insns) -> - case Insns of - [#pseudo_call{}|_] -> true; - %% moves to RA cannot occur yet - [_|Rest] -> clobbers_ra(Rest); - [] -> false +clobbers_ra(CFG) -> + any_insn(fun(#pseudo_call{}) -> true; + (_) -> false + end, CFG). + +any_insn(Pred, CFG) -> + %% Abuse fold to do an efficient "any"-operation using nonlocal control flow + FoundSatisfying = make_ref(), + try fold_insns(fun (I, _) -> + case Pred(I) of + true -> throw(FoundSatisfying); + false -> false + end + end, false, CFG) + of _ -> false + catch FoundSatisfying -> true end. %%% %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_sparc_defuse:insn_def_all(I)), - S2 = tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_sparc_defuse:insn_use_all(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_sparc_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). tset_empty() -> gb_sets:new(). @@ -601,16 +643,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_sparc:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_sparc:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_sparc_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/sparc/hipe_sparc_liveness_all.erl b/lib/hipe/sparc/hipe_sparc_liveness_all.erl index b7c3e1962a..f67b6c0fca 100644 --- a/lib/hipe/sparc/hipe_sparc_liveness_all.erl +++ b/lib/hipe/sparc/hipe_sparc_liveness_all.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_liveness_all). -export([analyse/1]). diff --git a/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl b/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl index 497c554c3e..bd2c0c75ee 100644 --- a/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl +++ b/lib/hipe/sparc/hipe_sparc_liveness_fpr.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_liveness_fpr). -export([analyse/1]). diff --git a/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl b/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl index 55d639e3a2..848148e301 100644 --- a/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl +++ b/lib/hipe/sparc/hipe_sparc_liveness_gpr.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_liveness_gpr). -export([analyse/1]). diff --git a/lib/hipe/sparc/hipe_sparc_main.erl b/lib/hipe/sparc/hipe_sparc_main.erl index c16751c7bd..ea05721f60 100644 --- a/lib/hipe/sparc/hipe_sparc_main.erl +++ b/lib/hipe/sparc/hipe_sparc_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,21 +11,20 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_main). -export([rtl_to_sparc/3]). rtl_to_sparc(MFA, RTL, Options) -> Defun1 = hipe_rtl_to_sparc:translate(RTL), + CFG1 = hipe_sparc_cfg:init(Defun1), %% io:format("~w: after translate\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun1), - Defun2 = hipe_sparc_ra:ra(Defun1, Options), + CFG2 = hipe_sparc_ra:ra(CFG1, Options), %% io:format("~w: after regalloc\n", [?MODULE]), - %% hipe_sparc_pp:pp(Defun2), - Defun3 = hipe_sparc_frame:frame(Defun2), + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + CFG3 = hipe_sparc_frame:frame(CFG2), + Defun3 = hipe_sparc_cfg:linearise(CFG3), %% io:format("~w: after frame\n", [?MODULE]), %% hipe_sparc_pp:pp(Defun3), Defun4 = hipe_sparc_finalise:finalise(Defun3), diff --git a/lib/hipe/sparc/hipe_sparc_pp.erl b/lib/hipe/sparc/hipe_sparc_pp.erl index 8fa0a9c788..d893094eb7 100644 --- a/lib/hipe/sparc/hipe_sparc_pp.erl +++ b/lib/hipe/sparc/hipe_sparc_pp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_pp). -export([pp/1, pp/2, pp_insn/1]). diff --git a/lib/hipe/sparc/hipe_sparc_ra.erl b/lib/hipe/sparc/hipe_sparc_ra.erl index afea8c9b4c..ba1a9aa3d8 100644 --- a/lib/hipe/sparc/hipe_sparc_ra.erl +++ b/lib/hipe/sparc/hipe_sparc_ra.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,43 +11,43 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_ra). -export([ra/2]). -ra(Defun0, Options) -> - %% hipe_sparc_pp:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} +ra(CFG0, Options) -> + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG0)), + {CFG1, _FPLiveness1, Coloring_fp, SpillIndex} = case proplists:get_bool(inline_fp, Options) of true -> - hipe_regalloc_loop:ra_fp(Defun0, Options, + FPLiveness0 = hipe_sparc_specific_fp:analyze(CFG0, no_context), + hipe_regalloc_loop:ra_fp(CFG0, FPLiveness0, Options, hipe_coalescing_regalloc, - hipe_sparc_specific_fp); + hipe_sparc_specific_fp, no_context); false -> - {Defun0,[],0} + {CFG0,undefined,[],0} end, - %% hipe_sparc_pp:pp(Defun1), - {Defun2, Coloring} + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG1)), + GPLiveness1 = hipe_sparc_specific:analyze(CFG1, no_context), + {CFG2, _GPLiveness2, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, GPLiveness1, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - hipe_sparc_ra_ls:ra(Defun1, SpillIndex, Options); + hipe_sparc_ra_ls:ra(CFG1, GPLiveness1, SpillIndex, Options); naive -> - hipe_sparc_ra_naive:ra(Defun1, Coloring_fp, Options); + hipe_sparc_ra_naive:ra(CFG1, GPLiveness1, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, - %% hipe_sparc_pp:pp(Defun2), - hipe_sparc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + %% hipe_sparc_pp:pp(hipe_sparc_cfg:linearise(CFG2)), + hipe_sparc_ra_finalise:finalise(CFG2, Coloring, Coloring_fp). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_sparc_specific). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + hipe_sparc_specific, no_context). diff --git a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl index dc1e69c101..a724821992 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_finalise.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_finalise.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,21 +11,19 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_ra_finalise). -export([finalise/3]). -include("hipe_sparc.hrl"). -finalise(Defun, TempMap, FPMap0) -> - Code = hipe_sparc:defun_code(Defun), - {_, SpillLimit} = hipe_sparc:defun_var_range(Defun), +finalise(CFG, TempMap, FPMap0) -> + {_, SpillLimit} = hipe_gensym:var_range(sparc), Map = mk_ra_map(TempMap, SpillLimit), FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), - NewCode = ra_code(Code, Map, FPMap1, []), - Defun#defun{code=NewCode}. + hipe_sparc_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FPMap1) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap, [])). ra_code([I|Insns], Map, FPMap, Accum) -> ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); @@ -44,6 +38,7 @@ ra_insn(I, Map, FPMap) -> #pseudo_call{} -> ra_pseudo_call(I, Map); #pseudo_move{} -> ra_pseudo_move(I, Map); #pseudo_set{} -> ra_pseudo_set(I, Map); + #pseudo_spill_move{} -> ra_pseudo_spill_move(I, Map); #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); #rdy{} -> ra_rdy(I, Map); #sethi{} -> ra_sethi(I, Map); @@ -53,6 +48,7 @@ ra_insn(I, Map, FPMap) -> #pseudo_fload{} -> ra_pseudo_fload(I, Map, FPMap); #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); #pseudo_fstore{} -> ra_pseudo_fstore(I, Map, FPMap); + #pseudo_spill_fmove{} -> ra_pseudo_spill_fmove(I, FPMap); _ -> I end. @@ -86,6 +82,12 @@ ra_pseudo_set(I=#pseudo_set{dst=Dst}, Map) -> NewDst = ra_temp(Dst, Map), I#pseudo_set{dst=NewDst}. +ra_pseudo_spill_move(I=#pseudo_spill_move{src=Src,temp=Temp,dst=Dst}, Map) -> + NewSrc = ra_temp(Src, Map), + NewTemp = ra_temp(Temp, Map), + NewDst = ra_temp(Dst, Map), + I#pseudo_spill_move{src=NewSrc,temp=NewTemp,dst=NewDst}. + ra_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV,stkargs=StkArgs}, Map) -> NewFunV = ra_funv(FunV, Map), NewStkArgs = ra_args(StkArgs, Map), @@ -126,6 +128,13 @@ ra_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, FPMap) -> NewDst = ra_temp_fp(Dst, FPMap), I#pseudo_fmove{src=NewSrc,dst=NewDst}. +ra_pseudo_spill_fmove(I=#pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst}, + FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewTemp = ra_temp_fp(Temp, FPMap), + NewDst = ra_temp_fp(Dst, FPMap), + I#pseudo_spill_fmove{src=NewSrc,temp=NewTemp,dst=NewDst}. + ra_pseudo_fstore(I=#pseudo_fstore{src=Src,base=Base}, Map, FPMap) -> NewSrc = ra_temp_fp(Src, FPMap), NewBase = ra_temp(Base, Map), diff --git a/lib/hipe/sparc/hipe_sparc_ra_ls.erl b/lib/hipe/sparc/hipe_sparc_ra_ls.erl index 19e7c92d2f..1b8659ab70 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_ls.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_ls.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,43 +11,39 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% Linear Scan register allocator for SPARC -module(hipe_sparc_ra_ls). --export([ra/3]). +-export([ra/4]). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_sparc_cfg:init(NewDefun), - SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG), - alloc(NewDefun, SpillIndex, SpillLimit, Options). +ra(CFG, Liveness, SpillIndex, Options) -> + SpillLimit = hipe_sparc_specific:number_of_temporaries(CFG, no_context), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). -alloc(Defun, SpillIndex, SpillLimit, Options) -> - CFG = hipe_sparc_cfg:init(Defun), +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> {Coloring, _NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, hipe_sparc_registers:allocatable_gpr()-- [hipe_sparc_registers:temp3(), hipe_sparc_registers:temp2(), hipe_sparc_registers:temp1()], [hipe_sparc_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - hipe_sparc_specific), - {NewDefun, _DidSpill} = + hipe_sparc_specific, no_context), + {NewCFG, _DidSpill} = hipe_sparc_ra_postconditions:check_and_rewrite( - Defun, Coloring, 'linearscan'), - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), + CFG, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context), {TempMap2,_NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - hipe_sparc_specific, TempMap), + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + hipe_sparc_specific, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc( - CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/sparc/hipe_sparc_ra_naive.erl b/lib/hipe/sparc/hipe_sparc_ra_naive.erl index b6c33dec6c..5ea9ead6bc 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_naive.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_naive.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,16 +11,13 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_ra_naive). --export([ra/3]). +-export([ra/4]). -include("hipe_sparc.hrl"). -ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} - {NewDefun,_DidSpill} = - hipe_sparc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), - {NewDefun, []}. +ra(CFG, Liveness, _Coloring_fp, _Options) -> % -> {CFG, Liveness, Coloring} + {NewCFG,_DidSpill} = + hipe_sparc_ra_postconditions:check_and_rewrite2(CFG, [], 'naive'), + {NewCFG, Liveness, []}. diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl index ab31b3c8d9..d3ecb43ec6 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_ra_postconditions). @@ -25,17 +18,13 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring, Allocator) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific), - check_and_rewrite2(Defun, TempMap, Allocator). +check_and_rewrite(CFG, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific, no_context), + check_and_rewrite2(CFG, TempMap, Allocator). -check_and_rewrite2(Defun, TempMap, Allocator) -> +check_and_rewrite2(CFG, TempMap, Allocator) -> Strategy = strategy(Allocator), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, Strategy, CFG, false). strategy(Allocator) -> case Allocator of @@ -44,6 +33,13 @@ strategy(Allocator) -> 'naive' -> 'fixed' end. +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). + do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); @@ -58,6 +54,7 @@ do_insn(I, TempMap, Strategy) -> #pseudo_call{} -> do_pseudo_call(I, TempMap, Strategy); #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); #pseudo_set{} -> do_pseudo_set(I, TempMap, Strategy); + #pseudo_spill_move{} -> do_pseudo_spill_move(I, TempMap, Strategy); #pseudo_tailcall{} -> do_pseudo_tailcall(I, TempMap, Strategy); #rdy{} -> do_rdy(I, TempMap, Strategy); #sethi{} -> do_sethi(I, TempMap, Strategy); @@ -96,14 +93,16 @@ do_pseudo_call(I=#pseudo_call{funv=FunV}, TempMap, Strategy) -> do_pseudo_move(I=#pseudo_move{src=Src,dst=Dst}, TempMap, Strategy) -> %% Either Dst or Src (but not both) may be a pseudo temp. - %% pseudo_move is a special case: in [XXX: not pseudo_tailcall] - %% all other instructions, all temps must be non-pseudos - %% after register allocation. - case temp_is_spilled(Dst, TempMap) of - true -> % Src must not be a pseudo - {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), - NewI = I#pseudo_move{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + %% pseudo_move and pseudo_spill_move [XXX: not pseudo_tailcall] + %% are special cases: in all other instructions, all temps must + %% be non-pseudos after register allocation. + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_move + Temp = clone(Src, temp1(Strategy)), + NewI = #pseudo_spill_move{src=Src,temp=Temp,dst=Dst}, + {[NewI], true}; _ -> {[I], false} end. @@ -113,6 +112,11 @@ do_pseudo_set(I=#pseudo_set{dst=Dst}, TempMap, Strategy) -> NewI = I#pseudo_set{dst=NewDst}, {[NewI | FixDst], DidSpill}. +do_pseudo_spill_move(I=#pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + do_pseudo_tailcall(I=#pseudo_tailcall{funv=FunV}, TempMap, Strategy) -> {FixFunV,NewFunV,DidSpill} = fix_funv(FunV, TempMap, Strategy), NewI = I#pseudo_tailcall{funv=NewFunV}, diff --git a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl index d893ac26e9..5fa3a5fc59 100644 --- a/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl +++ b/lib/hipe/sparc/hipe_sparc_ra_postconditions_fp.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_ra_postconditions_fp). @@ -25,13 +18,17 @@ -include("hipe_sparc.hrl"). -check_and_rewrite(Defun, Coloring) -> - TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp), - #defun{code=Code0} = Defun, - {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), - VarRange = {0, hipe_gensym:get_var(sparc)}, - {Defun#defun{code=Code1, var_range=VarRange}, - DidSpill}. +check_and_rewrite(CFG, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_sparc_specific_fp, + no_context), + do_bbs(hipe_sparc_cfg:labels(CFG), TempMap, CFG, false). + +do_bbs([], _TempMap, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_sparc_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, [], DidSpill0), + CFG = hipe_sparc_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, CFG, DidSpill). do_insns([I|Insns], TempMap, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap), @@ -46,6 +43,7 @@ do_insn(I, TempMap) -> #pseudo_fload{} -> do_pseudo_fload(I, TempMap); #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); #pseudo_fstore{} -> do_pseudo_fstore(I, TempMap); + #pseudo_spill_fmove{} -> do_pseudo_spill_fmove(I, TempMap); _ -> {[I], false} end. @@ -70,11 +68,13 @@ do_pseudo_fload(I=#pseudo_fload{dst=Dst}, TempMap) -> {[NewI | FixDst], DidSpill}. do_pseudo_fmove(I=#pseudo_fmove{src=Src,dst=Dst}, TempMap) -> - case temp_is_spilled(Dst, TempMap) of - true -> - {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), - NewI = I#pseudo_fmove{src=NewSrc}, - {FixSrc ++ [NewI], DidSpill}; + case temp_is_spilled(Src, TempMap) + andalso temp_is_spilled(Dst, TempMap) + of + true -> % Turn into pseudo_spill_fmove + Temp = clone(Src), + NewI = #pseudo_spill_fmove{src=Src,temp=Temp,dst=Dst}, + {[NewI], true}; _ -> {[I], false} end. @@ -84,6 +84,11 @@ do_pseudo_fstore(I=#pseudo_fstore{src=Src}, TempMap) -> NewI = I#pseudo_fstore{src=NewSrc}, {FixSrc ++ [NewI], DidSpill}. +do_pseudo_spill_fmove(I=#pseudo_spill_fmove{temp=Temp}, TempMap) -> + %% Temp is above the low water mark and must not have been spilled + false = temp_is_spilled(Temp, TempMap), + {[I], false}. + %%% Fix Dst and Src operands. fix_src(Src, TempMap) -> diff --git a/lib/hipe/sparc/hipe_sparc_registers.erl b/lib/hipe/sparc/hipe_sparc_registers.erl index 6681a10070..47876e21d2 100644 --- a/lib/hipe/sparc/hipe_sparc_registers.erl +++ b/lib/hipe/sparc/hipe_sparc_registers.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_sparc_registers). @@ -249,6 +242,8 @@ is_arg(R) -> _ -> false end. +%% Note: the fact that allocatable_gpr() is a subset of call_clobbered() is +%% hard-coded in hipe_sparc_defuse:insn_defs_all_gpr/1 call_clobbered() -> % does the RA strip the type or not? [%% ?G0 is the non-allocatable constant zero {?G1,tagged},{?G1,untagged}, diff --git a/lib/hipe/sparc/hipe_sparc_subst.erl b/lib/hipe/sparc/hipe_sparc_subst.erl new file mode 100644 index 0000000000..ce3bbb813a --- /dev/null +++ b/lib/hipe/sparc/hipe_sparc_subst.erl @@ -0,0 +1,82 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-module(hipe_sparc_subst). +-export([insn_temps/2]). +-include("hipe_sparc.hrl"). + +%% These should be moved to hipe_sparc and exported +-type temp() :: #sparc_temp{}. +-type src2() :: temp() | #sparc_simm13{}. +-type src2b() :: src2() | #sparc_uimm5{}. +-type funv() :: #sparc_mfa{} | #sparc_prim{} | temp(). +-type arg() :: temp() | integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(T, I) -> + S2 = fun(O) -> src2_temps(T, O) end, + S2B = fun(O) -> src2b_temps(T, O) end, + Arg = fun(O) -> arg_temps(T, O) end, + case I of + #alu{src1=L,src2=R,dst=D} -> I#alu{src1=T(L),src2=S2B(R),dst=T(D)}; + #bp{} -> I; + #comment{} -> I; + #jmp{src1=L,src2=R} -> I#jmp{src1=T(L),src2=S2(R)}; + #label{} -> I; + #pseudo_bp{} -> I; + #pseudo_call{funv=F} -> I#pseudo_call{funv=funv_temps(T,F)}; + #pseudo_call_prepare{} -> I; + #pseudo_move{src=S,dst=D} -> I#pseudo_move{src=T(S),dst=T(D)}; + #pseudo_ret{} -> I; + #pseudo_set{dst=D}-> I#pseudo_set{dst=T(D)}; + #pseudo_spill_move{src=S,temp=U,dst=D} -> + I#pseudo_spill_move{src=T(S),temp=T(U),dst=T(D)}; + #pseudo_tailcall{funv=F,stkargs=Stk} -> + I#pseudo_tailcall{funv=funv_temps(T,F),stkargs=lists:map(Arg,Stk)}; + #pseudo_tailcall_prepare{} -> I; + #rdy{dst=D} -> I#rdy{dst=T(D)}; + #sethi{dst=D} -> I#sethi{dst=T(D)}; + #store{src=S,base=B,disp=D} -> I#store{src=T(S),base=T(B),disp=S2(D)}; + #fp_binary{src1=L,src2=R,dst=D} -> + I#fp_binary{src1=T(L),src2=T(R),dst=T(D)}; + #fp_unary{src=S,dst=D} -> I#fp_unary{src=T(S),dst=T(D)}; + #pseudo_fload{base=B,disp=Di,dst=Ds} -> + I#pseudo_fload{base=T(B),disp=S2(Di),dst=T(Ds)}; + #pseudo_fmove{src=S,dst=D} -> I#pseudo_fmove{src=T(S),dst=T(D)}; + #pseudo_fstore{src=S,base=B,disp=D} -> + I#pseudo_fstore{src=T(S),base=T(B),disp=S2(D)}; + #pseudo_spill_fmove{src=S,temp=U,dst=D} -> + I#pseudo_spill_fmove{src=T(S),temp=T(U),dst=T(D)} + end. + +-spec src2_temps(subst_fun(), src2()) -> src2(). +src2_temps(_SubstTemp, I=#sparc_simm13{}) -> I; +src2_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). + +-spec src2b_temps(subst_fun(), src2b()) -> src2b(). +src2b_temps(_SubstTemp, I=#sparc_uimm5{}) -> I; +src2b_temps(SubstTemp, Op) -> src2_temps(SubstTemp, Op). + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, M=#sparc_mfa{}) -> M; +funv_temps(_SubstTemp, P=#sparc_prim{}) -> P; +funv_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). + +-spec arg_temps(subst_fun(), arg()) -> arg(). +arg_temps(_SubstTemp, Imm) when is_integer(Imm) -> Imm; +arg_temps(SubstTemp, T=#sparc_temp{}) -> SubstTemp(T). diff --git a/lib/hipe/ssa/hipe_ssa.inc b/lib/hipe/ssa/hipe_ssa.inc index 83ab320306..c7c1a8e1d7 100644 --- a/lib/hipe/ssa/hipe_ssa.inc +++ b/lib/hipe/ssa/hipe_ssa.inc @@ -1,9 +1,5 @@ -%% -*- erlang-indent-level: 2 -*- +%% -*- mode: erlang; erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%---------------------------------------------------------------------- %% File : hipe_ssa.inc @@ -943,9 +937,9 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> false -> do_code(Instrs, LiveIn, Changed, [Instr|Acc]); true -> - case ?CODE:is_safe(Instr) of + case ?CODE:is_call(Instr) of true -> - case ?CODE:is_call(Instr) of + case ?CODE:is_safe(Instr) of true -> case ?CODE:call_continuation(Instr) of [] -> @@ -955,11 +949,6 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> do_code(Instrs, LiveOut, true, [NewInstr|Acc]) end; false -> - do_code(Instrs, LiveOut, true, Acc) - end; - false -> %% not a safe instruction - cannot be removed - case ?CODE:is_call(Instr) of - true -> case ?CODE:call_dstlist(Instr) of [] -> %% result was not used anyway; no change do_code(Instrs, LiveIn, Changed, [Instr|Acc]); @@ -968,9 +957,14 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> do_code(Instrs, LiveIn, true, [NewInstr|Acc]); [_|_] -> %% calls with multiple dests are left untouched do_code(Instrs, LiveIn, Changed, [Instr|Acc]) - end; - false -> - do_code(Instrs, LiveIn, Changed, [Instr|Acc]) + end + end; + false -> + case ?CODE:reduce_unused(Instr) of + false -> % not a safe instruction - cannot be removed + do_code(Instrs, LiveIn, Changed, [Instr|Acc]); + Replacement -> + do_code(lists:reverse(Replacement, Instrs), LiveOut, true, Acc) end end end; diff --git a/lib/hipe/ssa/hipe_ssa_const_prop.inc b/lib/hipe/ssa/hipe_ssa_const_prop.inc index 648490f9a3..9c157e0833 100644 --- a/lib/hipe/ssa/hipe_ssa_const_prop.inc +++ b/lib/hipe/ssa/hipe_ssa_const_prop.inc @@ -1,10 +1,6 @@ %% -*- Erlang -*- %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%----------------------------------------------------------------------------- %% File : hipe_ssa_const_prop.inc diff --git a/lib/hipe/ssa/hipe_ssa_copy_prop.inc b/lib/hipe/ssa/hipe_ssa_copy_prop.inc index fd80d97b02..8677263213 100644 --- a/lib/hipe/ssa/hipe_ssa_copy_prop.inc +++ b/lib/hipe/ssa/hipe_ssa_copy_prop.inc @@ -1,10 +1,6 @@ %%% -*- Erlang -*- %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_ssa_copy_prop.inc diff --git a/lib/hipe/ssa/hipe_ssa_liveness.inc b/lib/hipe/ssa/hipe_ssa_liveness.inc index 78488c65fc..a1b49d5d35 100644 --- a/lib/hipe/ssa/hipe_ssa_liveness.inc +++ b/lib/hipe/ssa/hipe_ssa_liveness.inc @@ -1,10 +1,6 @@ %% -*- Erlang -*- %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -40,6 +34,15 @@ ssa_liveness__livein/2]). %% ssa_liveness__livein/3], %% ssa_liveness__liveout/2]). +-type set(E) :: gb_sets:set(E). +-type liveness(Label, Var) :: + #{Label => {{Gen :: set(Var), + Kill :: set(Var), + {TotalDirGen :: set(Var), + DirGen :: gb_trees:tree(Label, set(Var))}}, + LiveIn :: set(Var), + LiveOut :: set(Var), + Successors :: [Label]}}. -endif. %% -ifdef(DEBUG_LIVENESS). %% -export([pp_liveness/1]). @@ -262,21 +265,15 @@ update_directed_gen({Pred, Var}, Map)-> %% %% liveness %% +-compile({inline, [liveness_lookup/2, liveness_update/3]}). liveness_init(List) -> - liveness_init1(List, gb_trees:empty()). + maps:from_list(List). -liveness_init1([{Label, Info}|Left], Map) -> - liveness_init1(Left, gb_trees:insert(Label, Info, Map)); -liveness_init1([], Map) -> - Map. - -liveness_lookup(Label, Map) -> - {value, Info} = gb_trees:lookup(Label, Map), - Info. - -liveness_update(Label, NewInfo, Map) -> - gb_trees:update(Label, NewInfo, Map). +liveness_lookup(Label, Liveness) -> + maps:get(Label, Liveness). +liveness_update(Label, Val, Liveness) -> + maps:update(Label, Val, Liveness). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl index caa0e71d0b..430e097b91 100644 --- a/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl +++ b/lib/hipe/test/basic_SUITE_data/basic_bugs_hipe.erl @@ -18,6 +18,7 @@ test() -> ok = test_R12B5_seg_fault(), ok = test_switch_neg_int(), ok = test_icode_range_anal(), + ok = test_icode_range_call(), ok. %%----------------------------------------------------------------------- @@ -461,3 +462,44 @@ g(X, Z) -> test -> non_zero_test; other -> other end. + +%%----------------------------------------------------------------------- +%% From: Rich Neswold +%% Date: Oct 5, 2016 +%% +%% The following was a bug in the HiPE compiler's range analysis. The +%% function range_client/2 below would would not stop when N reached 0, +%% but keep recursing into the second clause forever. +%% +%% The problem turned out to be in hipe_icode_range:analyse_call/2, +%% which would note update the argument ranges of the callee if the +%% result of the call was ignored. +%% ----------------------------------------------------------------------- +-define(TIMEOUT, 42). + +test_icode_range_call() -> + Self = self(), + Client = spawn_link(fun() -> range_client(Self, 4) end), + range_server(4, Client). + +range_server(0, _Client) -> + receive + stopping -> ok; + {called_with, 0} -> error(failure) + after ?TIMEOUT -> error(timeout) + end; +range_server(N, Client) -> + receive + {called_with, N} -> + Client ! proceed + after ?TIMEOUT -> error(timeout) + end, + range_server(N-1, Client). % tailcall (so the bug does not affect it) + +range_client(Server, 0) -> + Server ! stopping; +range_client(Server, N) -> + Server ! {called_with, N}, + receive proceed -> ok end, + range_client(Server, N - 1), % non-tailrecursive call with ignored result + ok. diff --git a/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl new file mode 100644 index 0000000000..9bf5cf52cd --- /dev/null +++ b/lib/hipe/test/basic_SUITE_data/basic_edge_cases.erl @@ -0,0 +1,142 @@ +%%% -*- erlang-indent-level: 2 -*- +%%%---------------------------------------------------------------------- +%%% Contains +%%%---------------------------------------------------------------------- +-module(basic_edge_cases). + +-export([test/0]). + +test() -> + ok = test_float_spills(), + ok = test_infinite_loops(), + ok. + +%% Contains more float temps live at a single point than there are float +%% registers in any backend + +test_float_spills() -> + {{{2942.0,4670.0,3198.0,4926.0,2206.0,4734.0}, + {3118.0,2062.0,5174.0,3038.0,3618.0,3014.0}, + {2542.0,2062.0,4934.0,2590.0,3098.0,3062.0}, + {2950.0,3666.0,2574.0,5038.0,1866.0,2946.0}, + {3126.0,3050.0,3054.0,5070.0,2258.0,2714.0}, + {4734.0,2206.0,4926.0,3198.0,4670.0,2942.0}}, + 58937.0} = + mat66_flip_sum(35.0,86.0,32.0,88.0,33.0,57.0, + 22.0,77.0,91.0,80.0,14.0,33.0, + 51.0,28.0,87.0,20.0,91.0,11.0, + 68.0,83.0,64.0,82.0,10.0,86.0, + 74.0,18.0,08.0,52.0,10.0,14.0, + 89.0,34.0,64.0,66.0,58.0,55.0, + 0.0, 5), + ok. + +mat66_flip_sum(M11, M12, M13, M14, M15, M16, + M21, M22, M23, M24, M25, M26, + M31, M32, M33, M34, M35, M36, + M41, M42, M43, M44, M45, M46, + M51, M52, M53, M54, M55, M56, + M61, M62, M63, M64, M65, M66, + Acc, Ctr) + when is_float(M11), is_float(M12), is_float(M13), + is_float(M14), is_float(M15), is_float(M16), + is_float(M21), is_float(M22), is_float(M23), + is_float(M24), is_float(M25), is_float(M26), + is_float(M31), is_float(M32), is_float(M33), + is_float(M34), is_float(M35), is_float(M36), + is_float(M41), is_float(M42), is_float(M43), + is_float(M44), is_float(M45), is_float(M46), + is_float(M51), is_float(M52), is_float(M53), + is_float(M54), is_float(M55), is_float(M56), + is_float(M61), is_float(M62), is_float(M63), + is_float(M64), is_float(M65), is_float(M66), + is_float(Acc) -> + R11 = M66+M11, R12 = M65+M12, R13 = M64+M13, + R14 = M63+M14, R15 = M62+M15, R16 = M61+M16, + R21 = M56+M21, R22 = M55+M22, R23 = M54+M23, + R24 = M53+M24, R25 = M52+M25, R26 = M51+M26, + R31 = M46+M31, R32 = M45+M32, R33 = M44+M33, + R34 = M43+M34, R35 = M42+M35, R36 = M41+M36, + R41 = M26+M41, R42 = M25+M42, R43 = M24+M43, + R44 = M23+M44, R45 = M22+M45, R46 = M21+M46, + R51 = M36+M51, R52 = M35+M52, R53 = M34+M53, + R54 = M33+M54, R55 = M32+M55, R56 = M31+M56, + R61 = M16+M61, R62 = M15+M62, R63 = M14+M63, + R64 = M13+M64, R65 = M12+M65, R66 = M11+M66, + case Ctr of + 0 -> + {{{R11, R12, R13, R14, R15, R16}, + {R21, R22, R23, R24, R25, R26}, + {R31, R32, R33, R34, R35, R36}, + {R41, R42, R43, R44, R45, R46}, + {R51, R52, R53, R54, R55, R56}, + {R61, R62, R63, R64, R65, R66}}, + Acc}; + _ -> + NewAcc = 0.0 + M11 + M12 + M13 + M14 + M15 + M16 + + + M21 + M22 + M23 + M24 + M25 + M26 + + M31 + M32 + M33 + M34 + M35 + M36 + + M41 + M42 + M43 + M44 + M45 + M46 + + M51 + M52 + M53 + M54 + M55 + M56 + + M61 + M62 + M63 + M64 + M65 + M66 + + Acc, + mat66_flip_sum(R11+1.0, R12+1.0, R13+1.0, R14+1.0, R15+1.0, R16+1.0, + R21+1.0, R22+1.0, R23+1.0, R24+1.0, R25+1.0, R26+1.0, + R31+1.0, R32+1.0, R33+1.0, R34+1.0, R35+1.0, R36+1.0, + R41+1.0, R42+1.0, R43+1.0, R44+1.0, R45+1.0, R46+1.0, + R51+1.0, R52+1.0, R53+1.0, R54+1.0, R55+1.0, R56+1.0, + R61+1.0, R62+1.0, R63+1.0, R64+1.0, R65+1.0, R66+1.0, + NewAcc, Ctr-1) + end. + +%% Infinite loops must receive reduction tests, and might trip up basic block +%% weighting, leading to infinite weights and/or divisions by zero. + +test_infinite_loops() -> + OldTrapExit = process_flag(trap_exit, true), + ok = test_infinite_loop(fun infinite_recursion/0), + ok = test_infinite_loop(fun infinite_corecursion/0), + RecursiveFun = fun RecursiveFun() -> RecursiveFun() end, + ok = test_infinite_loop(RecursiveFun), + CorecursiveFunA = fun CorecursiveFunA() -> + CorecursiveFunA1 = fun () -> CorecursiveFunA() end, + CorecursiveFunA1() + end, + ok = test_infinite_loop(CorecursiveFunA), + CorecursiveFunB1 = fun(CorecursiveFunB) -> CorecursiveFunB() end, + CorecursiveFunB = fun CorecursiveFunB() -> + CorecursiveFunB1(CorecursiveFunB) + end, + ok = test_infinite_loop(CorecursiveFunB), + CorecursiveFunC1 = fun CorecursiveFunC1(Other) -> + Other(CorecursiveFunC1) + end, + CorecursiveFunC = fun CorecursiveFunC(Other) -> + Other(CorecursiveFunC) + end, + ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC1) end), + ok = test_infinite_loop(fun() -> CorecursiveFunC(CorecursiveFunC) end), + true = process_flag(trap_exit, OldTrapExit), + ok. + +-define(INFINITE_LOOP_TIMEOUT, 100). +test_infinite_loop(Fun) -> + Tester = spawn_link(Fun), + kill_soon(Tester), + receive {'EXIT', Tester, awake} -> + undefined = process_info(Tester), + ok + after ?INFINITE_LOOP_TIMEOUT -> error(timeout) + end. + +infinite_recursion() -> infinite_recursion(). + +infinite_corecursion() -> infinite_corecursion_1(). +infinite_corecursion_1() -> infinite_corecursion(). + +kill_soon(Pid) -> + _ = spawn_link(fun() -> + timer:sleep(1), + erlang:exit(Pid, awake) + end), + ok. diff --git a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl index 94c187e364..96e39d565a 100644 --- a/lib/hipe/test/basic_SUITE_data/basic_tuples.erl +++ b/lib/hipe/test/basic_SUITE_data/basic_tuples.erl @@ -55,6 +55,8 @@ test_element(T0, T1, T2, N) -> List = lists:seq(1, N), Tuple = list_to_tuple(List), ok = get_elements(List, Tuple, 1), + %% element/2 of larger tuple with omitted bounds test + true = lists:all(fun(I) -> I * I =:= square(I) end, lists:seq(1, 20)), %% some cases that throw exceptions {'EXIT', _} = (catch my_element(0, T2)), {'EXIT', _} = (catch my_element(3, T2)), @@ -73,6 +75,18 @@ get_elements([Element|Rest], Tuple, Pos) -> get_elements([], _Tuple, _Pos) -> ok. +squares() -> + {1*1, 2*2, 3*3, 4*4, 5*5, 6*6, 7*7, 8*8, 9*9, 10*10, + 11*11, 12*12, 13*13, 14*14, 15*15, 16*16, 17*17, 18*18, 19*19, 20*20}. + +square(N) when is_integer(N), N >= 1, N =< 20 -> + %% The guard tests lets the range analysis conclude N to be an integer in the + %% 1..20 range. 20-1=19 is bigger than ?SET_LIMIT in erl_types.erl, and will + %% thus be represented by an ?int_range() rather than an ?int_set(). + %% Because of the range analysis, the bounds test of this element/2 call + %% should be omitted. + element(N, squares()). + %%-------------------------------------------------------------------- %% Tests set_element/3. diff --git a/lib/hipe/test/hipe_testsuite_driver.erl b/lib/hipe/test/hipe_testsuite_driver.erl index a3048d907e..88576775ca 100644 --- a/lib/hipe/test/hipe_testsuite_driver.erl +++ b/lib/hipe/test/hipe_testsuite_driver.erl @@ -99,7 +99,7 @@ write_suite(Suite) -> write_header(#suite{suitename = SuiteName, outputfile = OutputFile, testcases = TestCases}) -> Exports = format_export(TestCases), - TimeLimit = 5, %% with 1 or 2 it fails on some slow machines... + TimeLimit = 6, %% with 1, 2, or 3 it fails on some slow machines... io:format(OutputFile, "%% ATTENTION!\n" "%% This is an automatically generated file. Do not edit.\n\n" @@ -168,6 +168,10 @@ run(TestCase, Dir, _OutDir) -> HiPEOpts = try TestCase:hipe_options() catch error:undef -> [] end, {ok, TestCase} = hipe:c(TestCase, HiPEOpts), ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o1|HiPEOpts]), + ok = TestCase:test(), + {ok, TestCase} = hipe:c(TestCase, [o0|HiPEOpts]), + ok = TestCase:test(), ToLLVM = try TestCase:to_llvm() catch error:undef -> true end, case ToLLVM andalso hipe:llvm_support_available() of true -> diff --git a/lib/hipe/test/maps_SUITE_data/maps_warn_pair_key_overloaded.erl b/lib/hipe/test/maps_SUITE_data/maps_warn_pair_key_overloaded.erl index 76b2a91f94..cce91530f4 100644 --- a/lib/hipe/test/maps_SUITE_data/maps_warn_pair_key_overloaded.erl +++ b/lib/hipe/test/maps_SUITE_data/maps_warn_pair_key_overloaded.erl @@ -14,7 +14,6 @@ test() -> "hi2" => lists:subtract([1,2],[1]), "hi3" => +3, "hi1" => erlang:min(1,2), - "hi1" => erlang:hash({1,2},35), "hi1" => erlang:phash({1,2},33), "hi1" => erlang:phash2({1,2},34), "hi1" => erlang:integer_to_binary(1337), diff --git a/lib/hipe/tools/hipe_jit.erl b/lib/hipe/tools/hipe_jit.erl index ffe0e440e9..5b937a9789 100644 --- a/lib/hipe/tools/hipe_jit.erl +++ b/lib/hipe/tools/hipe_jit.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2002 by Erik Johansson. diff --git a/lib/hipe/tools/hipe_profile.erl b/lib/hipe/tools/hipe_profile.erl index 9b9c0d6aad..f790dc6ebb 100644 --- a/lib/hipe/tools/hipe_profile.erl +++ b/lib/hipe/tools/hipe_profile.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved -%% Time-stamp: <2008-04-20 14:53:42 richard> %% ==================================================================== %% Module : hipe_profile %% Purpose : diff --git a/lib/hipe/tools/hipe_timer.erl b/lib/hipe/tools/hipe_timer.erl index 72aa25d440..13dbeb6f87 100644 --- a/lib/hipe/tools/hipe_timer.erl +++ b/lib/hipe/tools/hipe_timer.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2002-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,12 +11,9 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Copyright (c) 2001 by Erik Johansson. All Rights Reserved -%% Time-stamp: <2008-04-20 14:53:36 richard> %% ==================================================================== %% Module : hipe_timer %% Purpose : @@ -51,7 +44,7 @@ empty_time() -> {A,_} = erlang:statistics(runtime), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}. + {(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}. time(F) -> WTA = erlang:monotonic_time(), @@ -59,7 +52,7 @@ time(F) -> F(), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}. + {(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}. timer(F) -> WTA = erlang:monotonic_time(), @@ -67,7 +60,7 @@ timer(F) -> R = F(), WTB = erlang:monotonic_time(), {B,_} = erlang:statistics(runtime), - {R,{(WTB-WTA)/erlang:convert_time_unit(1, seconds, native),B-A}}. + {R,{(WTB-WTA)/erlang:convert_time_unit(1, second, native),B-A}}. advanced(_Fun, I) when I < 2 -> false; advanced(Fun, Iterations) -> diff --git a/lib/hipe/util/Makefile b/lib/hipe/util/Makefile index 66e9421c25..20b1c3bf50 100644 --- a/lib/hipe/util/Makefile +++ b/lib/hipe/util/Makefile @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2001-2016. All Rights Reserved. +# Copyright Ericsson AB 2001-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ HIPE_MODULES = hipe_vectors else HIPE_MODULES = endif -MODULES = hipe_timing hipe_dot hipe_digraph $(HIPE_MODULES) +MODULES = hipe_timing hipe_dot hipe_digraph hipe_dsets $(HIPE_MODULES) HRL_FILES= ERL_FILES= $(MODULES:%=%.erl) @@ -113,4 +113,3 @@ release_docs_spec: $(EBIN)/hipe_timing.beam: ../main/hipe.hrl -$(EBIN)/hipe_vectors.beam: hipe_vectors.hrl diff --git a/lib/hipe/util/hipe_digraph.erl b/lib/hipe/util/hipe_digraph.erl index 7446836926..0976395262 100644 --- a/lib/hipe/util/hipe_digraph.erl +++ b/lib/hipe/util/hipe_digraph.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %%----------------------------------------------------------------------- %% File : hipe_digraph.erl %% Author : Tobias Lindahl <[email protected]> diff --git a/lib/hipe/util/hipe_dot.erl b/lib/hipe/util/hipe_dot.erl index 53e474db42..22d9c02ba0 100644 --- a/lib/hipe/util/hipe_dot.erl +++ b/lib/hipe/util/hipe_dot.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- %%% File : hipe_dot.erl diff --git a/lib/hipe/util/hipe_dsets.erl b/lib/hipe/util/hipe_dsets.erl new file mode 100644 index 0000000000..9492cab0ff --- /dev/null +++ b/lib/hipe/util/hipe_dsets.erl @@ -0,0 +1,84 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%@doc +%% IMMUTABLE DISJOINT SETS OF ARBITRARY TERMS +%% +%% The disjoint set forests data structure, for elements of arbitrary types. +%% Note that the find operation mutates the set. +%% +%% We could do this more efficiently if we restricted the elements to integers, +%% and used the (mutable) hipe arrays. For arbitrary terms ETS could be used, +%% for a persistent interface (which isn't that nice when even accessors return +%% modified copies), the array module could be used. +-module(hipe_dsets). + +-export([new/1, find/2, union/3, to_map/1, to_rllist/1]). +-export_type([dsets/1]). + +-opaque dsets(X) :: #{X => {node, X} | {root, non_neg_integer()}}. + +-spec new([E]) -> dsets(E). +new(Elems) -> maps:from_list([{E,{root,0}} || E <- Elems]). + +-spec find(E, dsets(E)) -> {E, dsets(E)}. +find(E, DS0) -> + case DS0 of + #{E := {root,_}} -> {E, DS0}; + #{E := {node,N}} -> + case find(N, DS0) of + {N, _}=T -> T; + {R, DS1} -> {R, DS1#{E := {node,R}}} + end; + _ -> error(badarg, [E, DS0]) + end. + +-spec union(E, E, dsets(E)) -> dsets(E). +union(X, Y, DS0) -> + {XRoot, DS1} = find(X, DS0), + case find(Y, DS1) of + {XRoot, DS2} -> DS2; + {YRoot, DS2} -> + #{XRoot := {root,XRR}, YRoot := {root,YRR}} = DS2, + if XRR < YRR -> DS2#{XRoot := {node,YRoot}}; + XRR > YRR -> DS2#{YRoot := {node,XRoot}}; + true -> DS2#{YRoot := {node,XRoot}, XRoot := {root,XRR+1}} + end + end. + +-spec to_map(dsets(E)) -> {#{Elem::E => Root::E}, dsets(E)}. +to_map(DS) -> + to_map(maps:keys(DS), DS, #{}). + +to_map([], DS, Acc) -> {Acc, DS}; +to_map([K|Ks], DS0, Acc) -> + {KR, DS} = find(K, DS0), + to_map(Ks, DS, Acc#{K => KR}). + +-spec to_rllist(dsets(E)) -> {[{Root::E, Elems::[E]}], dsets(E)}. +to_rllist(DS0) -> + {Lists, DS} = to_rllist(maps:keys(DS0), #{}, DS0), + {maps:to_list(Lists), DS}. + +to_rllist([], Acc, DS) -> {Acc, DS}; +to_rllist([E|Es], Acc, DS0) -> + {ERoot, DS} = find(E, DS0), + to_rllist(Es, map_append(ERoot, E, Acc), DS). + +map_append(Key, Elem, Map) -> + case Map of + #{Key := List} -> Map#{Key := [Elem|List]}; + #{} -> Map#{Key => [Elem]} + end. diff --git a/lib/hipe/util/hipe_timing.erl b/lib/hipe/util/hipe_timing.erl index bf8a08dee5..3ebde7b1b5 100644 --- a/lib/hipe/util/hipe_timing.erl +++ b/lib/hipe/util/hipe_timing.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%==================================================================== %% Note: Uses the process keys: diff --git a/lib/hipe/util/hipe_vectors.erl b/lib/hipe/util/hipe_vectors.erl index 7f6c8e91c2..788dacd11b 100644 --- a/lib/hipe/util/hipe_vectors.erl +++ b/lib/hipe/util/hipe_vectors.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @@ -33,11 +27,25 @@ %% list_to_vector/1, list/1]). --include("hipe_vectors.hrl"). +%%-define(USE_TUPLES, true). +%%-define(USE_GBTREES, true). +-define(USE_ARRAYS, true). + +-type vector() :: vector(_). +-export_type([vector/0, vector/1]). + +-spec new(non_neg_integer(), V) -> vector(E) when V :: E. +-spec set(vector(E), non_neg_integer(), V :: E) -> vector(E). +-spec get(vector(E), non_neg_integer()) -> E. +-spec size(vector(_)) -> non_neg_integer(). +-spec vector_to_list(vector(E)) -> [E]. +%% -spec list_to_vector([E]) -> vector(E). +-spec list(vector(E)) -> [{non_neg_integer(), E}]. %% --------------------------------------------------------------------- -ifdef(USE_TUPLES). +-opaque vector(_) :: tuple(). new(N, V) -> erlang:make_tuple(N, V). @@ -68,8 +76,8 @@ get(Vec, Ix) -> element(Ix+1, Vec). %% --------------------------------------------------------------------- -ifdef(USE_GBTREES). +-opaque vector(E) :: gb_trees:tree(non_neg_integer(), E). --spec new(non_neg_integer(), _) -> hipe_vector(). new(N, V) when is_integer(N), N >= 0 -> gb_trees:from_orddict(mklist(N, V)). @@ -81,14 +89,11 @@ mklist(M, N, V) when M < N -> mklist(_, _, _) -> []. --spec size(hipe_vector()) -> non_neg_integer(). size(V) -> gb_trees:size(V). --spec list(hipe_vector()) -> [{_, _}]. list(Vec) -> gb_trees:to_list(Vec). -%% -spec list_to_vector([_]) -> hipe_vector(). %% list_to_vector(Xs) -> %% gb_trees:from_orddict(index(Xs, 0)). %% @@ -97,16 +102,28 @@ list(Vec) -> %% index([],_) -> %% []. --spec vector_to_list(hipe_vector()) -> [_]. vector_to_list(V) -> gb_trees:values(V). --spec set(hipe_vector(), non_neg_integer(), _) -> hipe_vector(). set(Vec, Ix, V) -> gb_trees:update(Ix, V, Vec). --spec get(hipe_vector(), non_neg_integer()) -> any(). get(Vec, Ix) -> gb_trees:get(Ix, Vec). -endif. %% ifdef USE_GBTREES + +%% --------------------------------------------------------------------- + +-ifdef(USE_ARRAYS). +-opaque vector(E) :: array:array(E). + +new(N, V) -> array:new(N, {default, V}). +size(V) -> array:size(V). +list(Vec) -> array:to_orddict(Vec). +%% list_to_vector(Xs) -> array:from_list(Xs). +vector_to_list(V) -> array:to_list(V). +set(Vec, Ix, V) -> array:set(Ix, V, Vec). +get(Vec, Ix) -> array:get(Ix, Vec). + +-endif. %% ifdef USE_ARRAYS diff --git a/lib/hipe/util/hipe_vectors.hrl b/lib/hipe/util/hipe_vectors.hrl deleted file mode 100644 index d4556e9dc4..0000000000 --- a/lib/hipe/util/hipe_vectors.hrl +++ /dev/null @@ -1,29 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%%-define(USE_TUPLES, true). --define(USE_GBTREES, true). - --ifdef(USE_TUPLES). --type hipe_vector() :: tuple(). --endif. - --ifdef(USE_GBTREES). --type hipe_vector() :: gb_trees:tree(). --endif. diff --git a/lib/hipe/vsn.mk b/lib/hipe/vsn.mk index 172d976931..0ef4aa7f09 100644 --- a/lib/hipe/vsn.mk +++ b/lib/hipe/vsn.mk @@ -1 +1 @@ -HIPE_VSN = 3.15.4 +HIPE_VSN = 3.16 diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..84edeaebe7 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,9 +60,9 @@ MODULES=hipe_rtl_to_x86 \ hipe_x86_ra_ls \ hipe_x86_ra_naive \ hipe_x86_ra_postconditions \ - hipe_x86_ra_x87_ls \ hipe_x86_registers \ hipe_x86_spill_restore \ + hipe_x86_subst \ hipe_x86_x87 HRL_FILES=hipe_x86.hrl @@ -133,7 +133,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM index 4c241cacb4..c518ea3481 100644 --- a/lib/hipe/x86/NOTES.OPTIM +++ b/lib/hipe/x86/NOTES.OPTIM @@ -1,5 +1,3 @@ -$Id$ - Partial x86 code optimisation guide =================================== Priority should be given to P6 and P4, then K7, diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA index ce80411642..173eaf229e 100644 --- a/lib/hipe/x86/NOTES.RA +++ b/lib/hipe/x86/NOTES.RA @@ -1,5 +1,3 @@ -$Id$ - Register Allocation =================== diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..31e4f6e4ac 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% Translate 3-address RTL code to 2-address pseudo-x86 code. @@ -85,32 +78,37 @@ conv_insn(I, Map, Data) -> true -> conv_shift(Dst, Src1, BinOp, Src2); false -> - conv_alu(Dst, Src1, BinOp, Src2, []) + conv_alu_nocc(Dst, Src1, BinOp, Src2, []) end, {FixSrc1++FixSrc2++I2, Map2, Data}; #alub{} -> %% dst = src1 op src2; if COND goto label BinOp = conv_binop(hipe_rtl:alub_op(I)), - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), Cc = conv_cond(hipe_rtl:alub_cond(I)), - I1 = [hipe_x86:mk_pseudo_jcc(Cc, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I))], - I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), - {FixSrc1++FixSrc2++I2, Map2, Data}; - #branch{} -> - %% <unused> = src1 - src2; if COND goto label - {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cc = conv_cond(hipe_rtl:branch_cond(I)), - I2 = conv_branch(Src1, Cc, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {FixSrc1++FixSrc2++I2, Map1, Data}; + BranchOp = conv_branchop(BinOp), + HasDst = hipe_rtl:alub_has_dst(I), + {I2, Map3} = + case (not HasDst) andalso BranchOp =/= none of + true -> + {conv_branch(Src1, BranchOp, Src2, Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), Map1}; + false -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2} + end, + {FixSrc1++FixSrc2++I2, Map3, Data}; #call{} -> %% push <arg1> %% ... @@ -126,7 +124,6 @@ conv_insn(I, Map, Data) -> hipe_rtl:call_continuation(I), hipe_rtl:call_fail(I), hipe_rtl:call_type(I)), - %% XXX Fixme: this ++ is probably inefficient. {FixArgs++I2, Map2, Data}; #comment{} -> I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], @@ -144,7 +141,7 @@ conv_insn(I, Map, Data) -> {I2, Map, Data}; #load{} -> {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of {byte, signed} -> @@ -171,6 +168,7 @@ conv_insn(I, Map, Data) -> Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), I2 = [hipe_x86:mk_move(Src, Dst)], {I2, Map0, Data}; + #move{src=Dst, dst=Dst} -> {[], Map, Data}; #move{} -> {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +180,11 @@ conv_insn(I, Map, Data) -> I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), {FixArgs++I2, Map0, Data}; #store{} -> - {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixSrc++FixOff++I2, Map2, Data}; + {FixPtr++FixSrc++FixOff++I2, Map2, Data}; #switch{} -> % this one also updates Data :-( %% from hipe_rtl2sparc, but we use a hairy addressing mode %% instead of doing the arithmetic manually @@ -206,7 +204,7 @@ conv_insn(I, Map, Data) -> {I2, Map1, NewData}; #fload{} -> {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], {I2, Map2, Data}; @@ -249,6 +247,34 @@ conv_insn(I, Map, Data) -> %%% Finalise the conversion of a 3-address ALU operation, taking %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) + %% We could use orelse instead of xor here to generate lea T1(T2), T3, but + %% they seem to move coalesce so well that move+add is better for them. + andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'add', Src2, Tail); + true -> % Use LEA + Type = typeof_dst(Dst), + Mem = case hipe_x86:is_temp(Src1) of + true -> hipe_x86:mk_mem(Src1, Src2, Type); + false -> hipe_x86:mk_mem(Src2, Src1, Type) + end, + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1) + andalso (not hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'sub', Src2, Tail); + true -> % Use LEA + Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)), + Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)), + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> + conv_alu(Dst, Src1, BinOp, Src2, Tail). + conv_alu(Dst, Src1, 'imul', Src2, Tail) -> mk_imul(Src1, Src2, Dst, Tail); conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -343,28 +369,41 @@ conv_shift(Dst, Src1, BinOp, Src2) -> %%% Finalise the conversion of a conditional branch operation, taking %%% care to not introduce more temps and moves than necessary. -conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +conv_branchop('sub') -> 'cmp'; +conv_branchop('and') -> 'test'; +conv_branchop(_) -> none. + +branchop_commutes('cmp') -> false; +branchop_commutes('test') -> true. + +conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> case hipe_x86:is_imm(Src1) of false -> - mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred); true -> case hipe_x86:is_imm(Src2) of false -> - NewCc = commute_cc(Cc), - mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + NewCc = case branchop_commutes(Op) of + true -> Cc; + false -> commute_cc(Cc) + end, + mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred); true -> %% two immediates, let the optimiser clean it up Tmp = new_untagged_temp(), [hipe_x86:mk_move(Src1, Tmp) | - mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)] end end. -mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> %% PRE: not(is_imm(Src1)) - [hipe_x86:mk_cmp(Src2, Src1), + [mk_branchtest(Src1, Op, Src2), hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. +mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1); +mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1). + %%% Convert an RTL ALU or ALUB binary operator. conv_binop(BinOp) -> @@ -572,6 +611,16 @@ conv_fun(Fun, Map) -> end end. +conv_src_noimm(Opnd, Map) -> + R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), + case hipe_x86:is_imm(Src) of + false -> R; + true -> + Tmp = new_untagged_temp(), + {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], + Tmp, NewMap} + end. + %%% Convert an RTL source operand (imm/var/reg). conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl index 33d7f77cf1..f514dd1ded 100644 --- a/lib/hipe/x86/hipe_x86.erl +++ b/lib/hipe/x86/hipe_x86.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% representation of 2-address pseudo-amd64 code @@ -37,7 +30,7 @@ mk_imm_from_addr/2, mk_imm_from_atom/1, is_imm/1, - %% imm_value/1, + imm_value/1, mk_mem/3, %% is_mem/1, @@ -174,6 +167,12 @@ mk_pseudo_spill/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, %% is_pseudo_tailcall/1, pseudo_tailcall_fun/1, @@ -201,7 +200,7 @@ shift_src/1, shift_dst/1, - %% mk_test/2, + mk_test/2, test_src/1, test_dst/1, @@ -218,6 +217,10 @@ %% highest_temp/1 ]). +%% Other utilities +-export([neg_cc/1 + ]). + %%% %%% Low-level accessors. %%% @@ -241,7 +244,7 @@ mk_imm_from_addr(Addr, Type) -> mk_imm_from_atom(Atom) -> mk_imm(Atom). is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. -%% imm_value(#x86_imm{value=Value}) -> Value. +imm_value(#x86_imm{value=Value}) -> Value. mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. @@ -305,7 +308,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. cmp_src(#cmp{src=Src}) -> Src. cmp_dst(#cmp{dst=Dst}) -> Dst. -%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. +mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. test_src(#test{src=Src}) -> Src. test_dst(#test{dst=Dst}) -> Dst. @@ -428,6 +431,14 @@ mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> mk_pseudo_spill(List) -> #pseudo_spill{args=List}. +mk_pseudo_spill_fmove(Src, Temp, Dst) -> + #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + +mk_pseudo_spill_move(Src, Temp, Dst) -> + #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> check_linkage(Linkage), #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl index ef99bf90d9..6cd69905b2 100644 --- a/lib/hipe/x86/hipe_x86.hrl +++ b/lib/hipe/x86/hipe_x86.hrl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% concrete representation of 2-address pseudo-x86 code @@ -98,6 +91,8 @@ -record(pseudo_call, {'fun', sdesc, contlab, linkage}). -record(pseudo_jcc, {cc, true_label, false_label, pred}). -record(pseudo_spill, {args=[]}). +-record(pseudo_spill_move, {src, temp, dst}). +-record(pseudo_spill_fmove, {src, temp, dst}). -record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(push, {src}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index e21223a5b1..50919bdf4e 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% HiPE/x86 assembler %%% @@ -69,7 +63,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap, Options), Options), @@ -154,6 +148,8 @@ insn_size(I) -> translate_insn(I, Context, Options) -> case I of + #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} -> + [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}]; #alu{} -> Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), [{hipe_x86:alu_op(I), Arg, I}]; @@ -234,11 +230,11 @@ translate_insn(I, Context, Options) -> #move64{} -> translate_move64(I, Context); #movsx{} -> - Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), - [{movsx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movsx_src(I)), + [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}]; #movzx{} -> - Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), - [{movzx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movzx_src(I)), + [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}]; %% pseudo_call: eliminated before assembly %% pseudo_jcc: eliminated before assembly %% pseudo_tailcall: eliminated before assembly @@ -599,10 +595,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) -> {xmm, Reg}. -ifdef(HIPE_AMD64). +temp_to_rm8(#x86_temp{reg=Reg}) -> + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64(#x86_temp{reg=Reg}) -> {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-else. +temp_to_rm8(#x86_temp{reg=Reg}) -> + true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg), + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm16(#x86_temp{reg=Reg}) -> + {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -endif. +temp_to_rm32(#x86_temp{reg=Reg}) -> + {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rmArch(#x86_temp{reg=Reg}) -> {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64fp(#x86_temp{reg=Reg}) -> @@ -841,16 +847,15 @@ translate_move64(I, _Context) -> exit({?MODULE, I}). -endif. %%% mov{s,z}x -resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> - {temp_to_regArch(Dst), - case Type of - byte -> - mem_to_rm8(Src); - int16 -> - mem_to_rm16(Src); - int32 -> - mem_to_rm32(Src) - end}. +resolve_movx_src(Src=#x86_mem{type=Type}) -> + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end. %%% alu/cmp (_not_ test) resolve_alu_args(Src, Dst, Context) -> @@ -878,15 +883,29 @@ resolve_alu_args(Src, Dst, Context) -> %%% test resolve_test_args(Src, Dst, Context) -> case Src of - #x86_imm{} -> % imm8 not allowed - {_ImmSize,ImmValue} = translate_imm(Src, Context, false), - NewDst = - case Dst of - #x86_temp{reg=0} -> ?EAX; - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, - {NewDst, {imm32,ImmValue}}; + %% Since we're using an 8-bit instruction, the immediate is not sign + %% extended. Thus, we can use immediates up to 255. + #x86_imm{value=ImmVal} + when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 -> + Imm = {imm8, ImmVal}, + case Dst of + #x86_temp{reg=0} -> {al, Imm}; + #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst); + #x86_mem{} -> {mem_to_rm8(Dst), Imm} + end; + #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 -> + {case Dst of + #x86_temp{reg=0} -> eax; + #x86_temp{} -> temp_to_rm32(Dst); + #x86_mem{} -> mem_to_rm32(Dst) + end, {imm32, ImmVal}}; + #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32 + {_, ImmVal} = translate_imm(Src, Context, false), + {case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, {imm32, ImmVal}}; #x86_temp{} -> NewDst = case Dst of @@ -896,6 +915,18 @@ resolve_test_args(Src, Dst, Context) -> {NewDst, temp_to_regArch(Src)} end. +-ifdef(HIPE_AMD64). +resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}. +-else. +resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) -> + case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of + true -> {temp_to_rm8(Dst), Imm}; + false -> + %% Register does not exist in 8-bit version; use 16-bit instead + {temp_to_rm16(Dst), {imm16, ImmVal}} + end. +-endif. + %%% shifts resolve_shift_args(Src, Dst, Context) -> RM32 = diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..0a3c0fc9d6 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,24 +11,22 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_x86_cfg). -export([init/1, labels/1, start_label/1, succ/2, pred/2, - bb/2, bb_add/3]). + bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). --export([linearise/1, params/1, arity/1, redirect_jmp/3]). +-export([linearise/1, params/1, arity/1, redirect_jmp/3, branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(PRED_NEEDED,true). -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_x86.hrl"). -include("../flow/cfg.hrl"). @@ -78,6 +72,26 @@ branch_successors(Branch) -> #ret{} -> [] end. +branch_preds(Branch) -> + case Branch of + #jmp_switch{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + #pseudo_jcc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. @@ -107,7 +121,7 @@ mk_goto(Label) -> hipe_x86:mk_jmp_label(Label). is_label(I) -> - hipe_x86:is_label(I). + case I of #label{} -> true; _ -> false end. label_name(Label) -> hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 9cba6cbe4b..2731836dc1 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% compute def/use sets for x86 insns %%% @@ -35,7 +29,7 @@ -endif. -module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_use/1]). %% src_use/1]). +-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]). -include("../x86/hipe_x86.hrl"). %%% @@ -57,13 +51,25 @@ insn_def(I) -> #movzx{dst=Dst} -> dst_def(Dst); #pseudo_call{} -> call_clobbered(); #pseudo_spill{} -> []; + #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; + #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; #pseudo_tailcall_prepare{} -> tailcall_clobbered(); #shift{dst=Dst} -> dst_def(Dst); %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label - %% pseudo_jcc, pseudo_tailcall, push, ret + %% pseudo_jcc, pseudo_tailcall, push, ret, test _ -> [] end. + +%% @doc Answers whether instruction I defines all allocatable registers. Used by +%% hipe_regalloc_prepass. +-spec insn_defs_all(_) -> boolean(). +insn_defs_all(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + dst_def(Dst) -> case Dst of #x86_temp{} -> [Dst]; @@ -104,12 +110,15 @@ insn_use(I) -> #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> addtemp(Fun, arity_use(Arity)); #pseudo_spill{args=Args} -> Args; + #pseudo_spill_fmove{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), arity_use(Arity)))); #push{src=Src} -> addtemp(Src, []); #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare _ -> [] end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl index 3b7be86608..2662f76d0b 100644 --- a/lib/hipe/x86/hipe_x86_encode.erl +++ b/lib/hipe/x86/hipe_x86_encode.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Copyright (C) 2000-2005 Mikael Pettersson %%% @@ -65,6 +58,7 @@ cc/1, % 8-bit registers %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + reg_has_8bit/1, % 32-bit registers %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, % operands @@ -143,6 +137,8 @@ cc(g) -> ?CC_G. %% dh() -> ?DH. %% bh() -> ?BH. +reg_has_8bit(Reg) -> Reg =< ?BL. + %%% 32-bit registers -define(EAX, 2#000). @@ -700,8 +696,16 @@ shd_op_sizeof(Opnds) -> test_encode(Opnds) -> case Opnds of + {al, {imm8,Imm8}} -> + [16#A8, Imm8]; + {ax, {imm16,Imm16}} -> + [?PFX_OPND, 16#A9 | le16(Imm16, [])]; {eax, {imm32,Imm32}} -> [16#A9 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [16#F6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; {{rm32,RM32}, {imm32,Imm32}} -> [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; {{rm32,RM32}, {reg32,Reg32}} -> @@ -710,8 +714,16 @@ test_encode(Opnds) -> test_sizeof(Opnds) -> case Opnds of + {al, {imm8,_}} -> + 1 + 1; + {ax, {imm16,_}} -> + 2 + 2; {eax, {imm32,_}} -> 1 + 4; + {{rm8,RM8}, {imm8,_}} -> + 1 + sizeof_rm(RM8) + 1; + {{rm16,RM16}, {imm16,_}} -> + 2 + sizeof_rm(RM16) + 2; {{rm32,RM32}, {imm32,_}} -> 1 + sizeof_rm(RM32) + 4; {{rm32,RM32}, {reg32,_}} -> @@ -1283,7 +1295,11 @@ dotest1(OS) -> t(OS,'sub',{RM32,Imm8}), t(OS,'sub',{RM32,Reg32}), t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{al,Imm8}), + t(OS,'test',{ax,Imm16}), t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM8,Imm8}), + t(OS,'test',{RM16,Imm16}), t(OS,'test',{RM32,Imm32}), t(OS,'test',{RM32,Reg32}), t(OS,'xor',{eax,Imm32}), diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt index 13746e2a47..eab732fb2d 100644 --- a/lib/hipe/x86/hipe_x86_encode.txt +++ b/lib/hipe/x86/hipe_x86_encode.txt @@ -1,5 +1,3 @@ -$Id$ - hipe_x86_encode USAGE GUIDE Revision 0.4, 2001-10-09 diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 8851ead250..558321d0c3 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86 stack frame handling %%% @@ -46,15 +40,13 @@ -include("../x86/hipe_x86.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> - Formals = fix_formals(hipe_x86:defun_formals(Defun)), - Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> + Formals = fix_formals(hipe_x86_cfg:params(CFG0)), + Temps0 = all_temps(CFG0, Formals), + MinFrame = defun_minframe(CFG0), Temps = ensure_minframe(MinFrame, Temps0), - CFG0 = hipe_x86_cfg:init(Defun), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps), - hipe_x86_cfg:linearise(CFG1). + do_body(CFG0, Liveness, Formals, Temps). fix_formals(Formals) -> fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +61,14 @@ do_body(CFG0, Liveness, Formals, Temps) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_x86_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -112,13 +95,17 @@ do_insn(I, LiveOut, Context, FPoff) -> #imul{} -> {[do_imul(I, Context, FPoff)], FPoff}; #move{} -> - {[do_move(I, Context, FPoff)], FPoff}; + {do_move(I, Context, FPoff), FPoff}; #movsx{} -> {[do_movsx(I, Context, FPoff)], FPoff}; #movzx{} -> {[do_movzx(I, Context, FPoff)], FPoff}; #pseudo_call{} -> do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #push{} -> @@ -127,6 +114,8 @@ do_insn(I, LiveOut, Context, FPoff) -> {do_ret(I, Context, FPoff), context_framesize(Context)}; #shift{} -> {[do_shift(I, Context, FPoff)], FPoff}; + #test{} -> + {[do_test(I, Context, FPoff)], FPoff}; _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare {[I], FPoff} end. @@ -159,22 +148,50 @@ do_fp_binop(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), [I#fp_binop{src=Src,dst=Dst}]. -do_fmove(I, Context, FPoff) -> - #fmove{src=Src0,dst=Dst0} = I, +do_fmove(I0, Context, FPoff) -> + #fmove{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#fmove{src=Src,dst=Dst}. + I = I0#fmove{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_fmove(I0, Context, FPoff) -> + #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}] + end. do_imul(I, Context, FPoff) -> #imul{src=Src0} = I, Src = conv_opnd(Src0, FPoff, Context), I#imul{src=Src}. -do_move(I, Context, FPoff) -> - #move{src=Src0,dst=Dst0} = I, +do_move(I0, Context, FPoff) -> + #move{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#move{src=Src,dst=Dst}. + I = I0#move{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_move(I0, Context, FPoff) -> + #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}] + end. do_movsx(I, Context, FPoff) -> #movsx{src=Src0,dst=Dst0} = I, @@ -199,6 +216,12 @@ do_shift(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), I#shift{src=Src,dst=Dst}. +do_test(I, Context, FPoff) -> + #test{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#test{src=Src,dst=Dst}. + conv_opnd(Opnd, FPoff, Context) -> case opnd_is_pseudo(Opnd) of false -> @@ -609,39 +632,46 @@ temp_is_pseudo(Temp) -> %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_x86_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). + +-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, + tset_filter/2, tset_to_list/1]}). tset_empty() -> - gb_sets:new(). + #{}. tset_size(S) -> - gb_sets:size(S). + map_size(S). tset_insert(S, T) -> - gb_sets:add_element(T, S). + S#{T => []}. -tset_add_list(S, Ts) -> - gb_sets:union(S, gb_sets:from_list(Ts)). +tset_add_list(S, []) -> S; +tset_add_list(S, [T|Ts]) -> + tset_add_list(S#{T => []}, Ts). -tset_del_list(S, Ts) -> - gb_sets:subtract(S, gb_sets:from_list(Ts)). +tset_del_list(S, []) -> S; +tset_del_list(S, [T|Ts]) -> + tset_del_list(maps:remove(T,S), Ts). tset_filter(S, F) -> - gb_sets:filter(F, S). + maps:filter(fun(K, _V) -> F(K) end, S). tset_to_list(S) -> - gb_sets:to_list(S). + maps:keys(S). %%% %%% Compute minimum permissible frame size, ignoring spilled temps. @@ -649,16 +679,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl index ce46ec920e..470501b46d 100644 --- a/lib/hipe/x86/hipe_x86_liveness.erl +++ b/lib/hipe/x86/hipe_x86_liveness.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86_liveness -- compute register liveness for x86 CFGs diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..7e9fd10e62 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_MAIN, hipe_amd64_main). @@ -53,19 +46,23 @@ ?RTL_TO_X86(MFA, RTL, Options) -> Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), "RTL-to-"?X86STR, Options), - SpillRest = + TransCFG = ?option_time(hipe_x86_cfg:init(Translated), + ?X86STR" to cfg", Options), + SpillRestCFG = case proplists:get_bool(caller_save_spill_restore, Options) of true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), ?X86STR" spill restore", Options); false -> - Translated + TransCFG end, - Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), - ?X86STR" register allocation", Options), - Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), - ?X86STR" frame", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), + AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), + ?X86STR" register allocation", Options), + FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), + ?X86STR" frame", Options), + Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), + ?X86STR" linearise", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl index 4515822a34..925054dd68 100644 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%---------------------------------------------------------------------- %%% File : hipe_x86_postpass.erl @@ -63,9 +57,10 @@ postpass(#defun{code=Code0}=Defun, Options) -> peephole_optimization(Insns) -> peep(Insns, [], []). -%% MoveSelf related peep-opts + +%% MoveSelf related peep-opts %% ------------------------------ -peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> +peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> peep(Insns, Res, [moveSelf1|Lst]); peep([I=#fmove{src=Src, dst=Dst}, #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> @@ -120,19 +115,15 @@ peep([#move{src=Src1, dst=Dst}, %% ElimCmp0 %% -------- -peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) -> - case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and - ((Cond =:= 'eq') or (Cond =:= 'neq'))) of - true -> - Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, - Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end, - Test = #test{src=Src2, dst=#x86_imm{value=0}}, - Jump = #jcc{cc=Cond2, label=Lab}, - peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]); - _ -> - peep(Insns, [J,C|Res], Lst) - end; - +peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> + %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in + %% this case to 0). However, since HiPE does not use any instructions that + %% read the adjust flag, we can do this transform safely. + peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]); +peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}}, + J=#jcc{cc=Cond}|Insns],Res,Lst) + when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison + peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]); %% ElimCmpTest %% ----------- @@ -169,8 +160,7 @@ peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> %% ElimSet0 %% -------- -peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) -when (Dst==#x86_temp{}) -> +peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); %% ElimMDPow2 @@ -187,6 +177,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> peep(Insns, [B|Res], Lst) end; +%% LeaToAdd +%% This rule transforms lea into add when the destination is the same as one of +%% the operands. Sound because lea is never used where the condition codes are +%% live (and would be clobbered by add). +%% ---------- +peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); +peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); + %% SubToDec %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which %% changes reduction counter tests to use decl instead of subl. @@ -209,6 +211,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []). goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> goto_elim([I|Insns], Res); +goto_elim([#jcc{cc=CC, label=Label} = IJCC, + #jmp_label{label=BranchTgt}, + #label{label=Label} = ILBL|Insns], Res) -> + goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt}, + ILBL|Insns], Res); goto_elim([I | Insns], Res) -> goto_elim(Insns, [I|Res]); goto_elim([], Res) -> diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl index 9352cf5dbf..72d2fa80bf 100644 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86 pretty-printer @@ -171,7 +165,7 @@ pp_insn(Dev, I, Pre) -> #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> io:format(Dev, "\tpseudo_tailcall ", []), pp_fun(Dev, Fun), - io:format(Dev, "~w (", [Arity]), + io:format(Dev, " ~w (", [Arity]), pp_args(Dev, StkArgs), io:format(Dev, ") ~w\n", [Linkage]); #pseudo_tailcall_prepare{} -> @@ -188,6 +182,12 @@ pp_insn(Dev, I, Pre) -> io:format(Dev, ", ", []), pp_dst(Dev, Dst), io:format(Dev, "\n", []); + #test{src=Src, dst=Dst} -> + io:format(Dev, "\ttest ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); #fp_binop{src=Src, dst=Dst, op=Op} -> io:format(Dev, "\t~s ", [Op]), pp_dst(Dev, Dst), diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..f358306d49 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_RA, hipe_amd64_ra). @@ -41,60 +34,83 @@ %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun0, Options) -> - %% ?HIPE_X86_PP:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), - %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> + hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, + 0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> + %% hipe_x86_cfg:pp(CFG0), + Liveness0 = ?HIPE_X86_SPECIFIC:analyze(CFG0, no_context), + {CFG1, Liveness, Coloring_fp, SpillIndex} = ra_fp(CFG0, Liveness0, Options), + %% hipe_x86_cfg:pp(CFG1), ?start_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun1)), - element(2,hipe_x86:defun_var_range(Defun1))), - {Defun2, Coloring} + code_size(CFG1), + element(2,hipe_gensym:var_range(x86))), + {CFG2, _, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); naive -> - ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + ?HIPE_X86_RA_NAIVE:ra(CFG1, Liveness, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, ?stop_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun2)), - element(2,hipe_x86:defun_var_range(Defun2))), - %% ?HIPE_X86_PP:pp(Defun2), - ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + code_size(CFG2), + element(2,hipe_gensym:var_range(x86))), + %% hipe_x86_cfg:pp(CFG2), + ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + ?HIPE_X86_SPECIFIC, no_context). -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> - case proplists:get_bool(inline_fp, Options) and - (proplists:get_value(regalloc, Options) =/= naive) of - true -> - case proplists:get_bool(x87, Options) of - true -> - hipe_amd64_ra_x87_ls:ra(Defun, Options); - false -> - hipe_regalloc_loop:ra_fp(Defun, Options, - hipe_coalescing_regalloc, - hipe_amd64_specific_sse2) - end; - false -> - {Defun,[],0} +ra_fp(CFG, Liveness, Options) -> + Regalloc0 = proplists:get_value(regalloc, Options), + {Regalloc, TargetMod} = + case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of + false -> {naive, undefined}; + true -> + case proplists:get_bool(x87, Options) of + true -> {linear_scan, hipe_amd64_specific_x87}; + false -> {Regalloc0, hipe_amd64_specific_sse2} + end + end, + case Regalloc of + coalescing -> + ra_fp(CFG, Liveness, Options, hipe_coalescing_regalloc, TargetMod); + optimistic -> + ra_fp(CFG, Liveness, Options, hipe_optimistic_regalloc, TargetMod); + graph_color -> + ra_fp(CFG, Liveness, Options, hipe_graph_coloring_regalloc, TargetMod); + linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Liveness, Options, TargetMod, + no_context); + naive -> {CFG,Liveness,[],0}; + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) end. + +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod) -> + hipe_regalloc_loop:ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, + no_context). -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Liveness, Options) -> case proplists:get_bool(inline_fp, Options) of true -> - hipe_x86_ra_x87_ls:ra(Defun, Options); + hipe_x86_ra_ls:ra_fp(CFG, Liveness, Options, hipe_x86_specific_x87, + no_context); false -> - {Defun,[],0} + {CFG,Liveness,[],0} end. -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..e8abe78e00 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% - apply temp -> reg/spill map from RA @@ -25,23 +18,36 @@ -define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_SSE2, hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr), Expr). -else. -define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_X87, hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),). -endif. -module(?HIPE_X86_RA_FINALISE). -export([finalise/4]). -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> - Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> + CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), case proplists:get_bool(x87, Options) of true -> - ?HIPE_X86_X87:map(Defun1); + ?HIPE_X86_X87:map(CFG1); _ -> - Defun1 + case + proplists:get_bool(inline_fp, Options) + and (proplists:get_value(regalloc, Options) =:= linear_scan) + of + %% Ugly, but required to avoid Dialyzer complaints about "Unknown + %% function" hipe_x86_sse2:map/1 + ?IF_HAS_SSE2(true -> + ?HIPE_X86_SSE2:map(CFG1);) + false -> + CFG1 + end end. %%% @@ -50,15 +56,16 @@ finalise(Defun, TempMap, FpMap, Options) -> %%% but I just want this to work now) %%% -finalise_ra(Defun, [], [], _Options) -> - Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> - Code = hipe_x86:defun_code(Defun), - {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> + CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> + {_, SpillLimit} = hipe_gensym:var_range(x86), Map = mk_ra_map(TempMap, SpillLimit), FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - NewCode = ra_code(Code, Map, FpMap0), - Defun#defun{code=NewCode}. + hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). ra_code(Code, Map, FpMap) -> [ra_insn(I, Map, FpMap) || I <- Code]. @@ -133,6 +140,16 @@ ra_insn(I, Map, FpMap) -> I#pseudo_call{'fun'=Fun}; #pseudo_jcc{} -> I; + #pseudo_spill_fmove{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Temp = ra_opnd(Temp0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}; + #pseudo_spill_move{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Temp = ra_opnd(Temp0, Map), + Dst = ra_opnd(Dst0, Map), + I#pseudo_spill_move{src=Src, temp=Temp, dst=Dst}; #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> Fun = ra_opnd(Fun0, Map), StkArgs = ra_args(StkArgs0, Map), @@ -148,6 +165,10 @@ ra_insn(I, Map, FpMap) -> Src = ra_opnd(Src0, Map), Dst = ra_opnd(Dst0, Map), I#shift{src=Src,dst=Dst}; + #test{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#test{src=Src,dst=Dst}; _ -> exit({?MODULE,ra_insn,I}) end. @@ -230,49 +251,27 @@ mk_ra_map(TempMap, SpillLimit) -> gb_trees:empty(), TempMap). -conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> +conv_ra_maplet({From,To}, SpillLimit, IsPrecoloured) + when is_integer(From), From =< SpillLimit -> %% From should be a pseudo, or a hard reg mapped to itself. - if is_integer(From), From =< SpillLimit -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of - false -> []; - _ -> - case To of - {reg, From} -> []; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) + case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of + false -> ok; + _ -> To = {reg, From}, ok end, %% end of From check case To of - {reg, NewReg} -> + {reg, NewReg} when is_integer(NewReg) -> %% NewReg should be a hard reg, or a pseudo mapped %% to itself (formals are handled this way). - if is_integer(NewReg) -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of - true -> []; - _ -> if From =:= NewReg -> []; - true -> - exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of NewReg check + true = (?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) orelse From =:= NewReg), {From, NewReg}; - {spill, SpillIndex} -> - %% SpillIndex should be >= 0. - if is_integer(SpillIndex), SpillIndex >= 0 -> []; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of SpillIndex check + {spill, SpillIndex} when is_integer(SpillIndex), SpillIndex >= 0 -> ToTempNum = SpillLimit+SpillIndex+1, MaxTempNum = hipe_gensym:get_var(x86), if MaxTempNum >= ToTempNum -> ok; true -> hipe_gensym:set_var(x86, ToTempNum) end, - {From, ToTempNum}; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + {From, ToTempNum} end. mk_ra_map_x87(FpMap, SpillLimit) -> diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..581abd299d 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Linear Scan register allocator for x86 @@ -35,41 +29,64 @@ -endif. -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/5]). -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) -> SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( - CFG), + CFG, no_context), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Liveness, Options, TargetMod, TargetCtx) -> + ?inc_counter(ra_calls_counter,1), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + + {Coloring,NewSpillIndex} = + regalloc(CFG, Liveness, + TargetMod:allocatable('linearscan', TargetCtx), + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + TargetMod, TargetCtx), + + {NewCFG, _DidSpill} = + TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan', TargetCtx), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + TargetMod, TargetCtx, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + ?add_spills(Options, NewSpillIndex), + {NewCFG, Liveness, Coloring2, NewSpillIndex2}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> ?inc_counter(ra_iteration_counter,1), %% ?HIPE_X86_PP:pp(Defun), - CFG = hipe_x86_cfg:init(Defun), - {Coloring, NewSpillIndex} = + {Coloring, NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, ?HIPE_X86_REGISTERS:allocatable()-- [?HIPE_X86_REGISTERS:temp1(), ?HIPE_X86_REGISTERS:temp0()], [hipe_x86_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC), - {NewDefun, _DidSpill} = + ?HIPE_X86_SPECIFIC, no_context), + {NewCFG, _DidSpill} = ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), %% ?HIPE_X86_PP:pp(NewDefun), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - ?HIPE_X86_SPECIFIC, TempMap), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + ?HIPE_X86_SPECIFIC, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), case proplists:get_bool(verbose_spills, Options) of @@ -79,8 +96,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> ok end, ?add_spills(Options, NewSpillIndex), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, - DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..f96c662d18 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% simple local x86 regalloc @@ -33,15 +27,14 @@ -endif. -module(?HIPE_X86_RA_NAIVE). --export([ra/3]). +-export([ra/4]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> - #defun{code=Code0} = X86Defun, - Code1 = do_insns(Code0), +ra(CFG0, Liveness, Coloring_fp, Options) -> + CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), NofSpilledFloats = count_non_float_spills(Coloring_fp), NofFloats = length(Coloring_fp), ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,15 +42,17 @@ ra(X86Defun, Coloring_fp, Options) -> NofSpilledFloats - NofFloats), TempMap = [], - {X86Defun#defun{code=Code1, - var_range={0, hipe_gensym:get_var(x86)}}, + {CFG, Liveness, TempMap}. +do_bb(_Lbl, BB) -> + hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). + count_non_float_spills(Coloring_fp) -> count_non_float_spills(Coloring_fp, 0). count_non_float_spills([{_,To}|Tail], Num) -> - case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of + case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To, no_context) of true -> count_non_float_spills(Tail, Num); false -> @@ -99,6 +94,8 @@ do_insn(I) -> % Insn -> Insn list do_fp_binop(I); #shift{} -> do_shift(I); + #test{} -> + do_test(I); #label{} -> [I]; #pseudo_jcc{} -> @@ -309,6 +306,11 @@ do_shift(I) -> FixDst ++ [I#shift{dst=Dst}] end. +do_test(I) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}]. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..db6391d5c1 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). @@ -40,14 +33,18 @@ -include("../main/hipe.hrl"). -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) -> %% io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), %% io:format("Rewriting\n"), - #defun{code=Code0} = Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, - DidSpill}. + do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -77,8 +74,12 @@ do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} do_movx(I, TempMap, Strategy); #fmove{} -> do_fmove(I, TempMap, Strategy); + #pseudo_spill_move{} -> + do_pseudo_spill_move(I, TempMap, Strategy); #shift{} -> do_shift(I, TempMap, Strategy); + #test{} -> + do_test(I, TempMap, Strategy); _ -> %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, %% pseudo_tailcall_prepare, push, ret @@ -169,24 +170,41 @@ do_jmp_switch(I, TempMap, Strategy) -> %%% Fix a lea op. do_lea(I, TempMap, Strategy) -> - #lea{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], - true} + #lea{mem=Mem0,temp=Temp0} = I, + {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), + case Mem of + #x86_mem{base=Base, off=#x86_imm{value=0}} -> + %% We've decayed into a move due to both operands being memory (there's an + %% 'add' in FixMem). + {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; + #x86_mem{} -> + {StoreTemp, Temp, DidSpill2} = + case is_mem_opnd(Temp0, TempMap) of + false -> {[], Temp0, false}; + true -> + Temp1 = clone2(Temp0, temp0(Strategy)), + {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} + end, + {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} end. %%% Fix a move op. do_move(I, TempMap, Strategy) -> #move{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_check_byte_move(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], - DidSpill}. + case + is_record(Src0, x86_temp) andalso is_record(Dst0, x86_temp) + andalso is_spilled(Src0, TempMap) andalso is_spilled(Dst0, TempMap) + of + true -> + Tmp = clone(Src0, Strategy), + {[hipe_x86:mk_pseudo_spill_move(Src0, Tmp, Dst0)], true}; + false -> + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_check_byte_move(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], + DidSpill} + end. -ifdef(HIPE_AMD64). @@ -280,6 +298,13 @@ do_fmove(I, TempMap, Strategy) -> {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], DidSpill1 or DidSpill2}. +%%% Fix an pseudo_spill_move op. + +do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = is_spilled(Temp, TempMap), + {[I], false}. % nothing to do + %%% Fix a shift operation. %%% 1. remove pseudos from any explicit memory operands %%% 2. if the source is a register or memory position @@ -296,6 +321,14 @@ do_shift(I, TempMap, Strategy) -> {FixDst ++ [I#shift{dst=Dst}], DidSpill} end. +%%% Fix a test op. + +do_test(I, TempMap, Strategy) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, @@ -377,19 +410,12 @@ is_mem_opnd(Opnd, TempMap) -> Reg = hipe_x86:temp_reg(Opnd), case hipe_x86:temp_is_allocatable(Opnd) of true -> - case tuple_size(TempMap) > Reg of + case + hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> - %% impossible, but was true in ls post and false in normal post - exit({?MODULE,is_mem_opnd,Reg}), - false + ?count_temp(Reg), + true; + false -> false end; false -> true end; @@ -404,15 +430,10 @@ is_spilled(Temp, TempMap) -> case hipe_x86:temp_is_allocatable(Temp) of true -> Reg = hipe_x86:temp_reg(Temp), - case tuple_size(TempMap) > Reg of + case hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> - false - end; + ?count_temp(Reg), + true; false -> false end; @@ -429,14 +450,14 @@ clone(Dst, Strategy) -> end, spill_temp(Type, Strategy). -spill_temp0(Type, 'normal') -> +spill_temp0(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp0(Type, 'linearscan') -> +spill_temp0(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). -spill_temp(Type, 'normal') -> +spill_temp(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp(Type, 'linearscan') -> +spill_temp(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). %%% Make a certain reg into a clone of Dst @@ -448,6 +469,6 @@ clone2(Dst, RegOpt) -> #x86_temp{} -> hipe_x86:temp_type(Dst) end, case RegOpt of - [] -> hipe_x86:mk_new_temp(Type); + [] when Type =/= double -> hipe_x86:mk_new_temp(Type); Reg -> hipe_x86:mk_temp(Reg, Type) end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> - ?inc_counter(ra_calls_counter,1), - CFG = hipe_x86_cfg:init(Defun), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - - {Coloring,NewSpillIndex} = - ?HIPE_X86_RA_LS:regalloc(Cfg, - ?HIPE_X86_SPECIFIC_X87:allocatable(), - [hipe_x86_cfg:start_label(Cfg)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC_X87), - - ?add_spills(Options, NewSpillIndex), - {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl index 179d734501..dbff68ad28 100644 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,9 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% TODO: %%% - Do we need a pseudo reg for the condition codes? @@ -224,6 +216,8 @@ ret(N) -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of +%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? {?EDX,tagged},{?EDX,untagged}, diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..90edef31f3 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %% ==================================================================== %% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008) %% ==================================================================== @@ -25,13 +19,11 @@ -ifdef(HIPE_AMD64). -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(X86STR, "amd64"). -else. -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(X86STR, "x86"). -endif. @@ -51,15 +43,13 @@ -include("../flow/cfg.hrl"). % Added for the definition of #cfg{} %% Main function -spill_restore(Defun, Options) -> - CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), - CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), - hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> + CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), + ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). %% Performs the first pass of the algorithm. %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> - CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) -> %% get the labels bottom up Labels = hipe_x86_cfg:postorder(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl new file mode 100644 index 0000000000..7db3b23d92 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_subst.erl @@ -0,0 +1,112 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SUBST, hipe_amd64_subst). +-else. +-define(HIPE_X86_SUBST, hipe_x86_subst). +-endif. + +-module(?HIPE_X86_SUBST). +-export([insn_temps/2, insn_lbls/2]). +-include("../x86/hipe_x86.hrl"). + +%% These should be moved to hipe_x86 and exported +-type temp() :: #x86_temp{}. +-type oper() :: temp() | #x86_imm{} | #x86_mem{}. +-type mfarec() :: #x86_mfa{}. +-type prim() :: #x86_prim{}. +-type funv() :: mfarec() | prim() | temp(). +-type label() :: non_neg_integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(SubstTemp, I) -> + O = fun(O) -> oper_temps(SubstTemp, O) end, + case I of + #alu {src=S, dst=D} -> I#alu {src=O(S), dst=O(D)}; + #cmovcc {src=S, dst=D} -> I#cmovcc {src=O(S), dst=O(D)}; + #cmp {src=S, dst=D} -> I#cmp {src=O(S), dst=O(D)}; + #fmove {src=S, dst=D} -> I#fmove {src=O(S), dst=O(D)}; + #fp_binop{src=S, dst=D} -> I#fp_binop{src=O(S), dst=O(D)}; + #imul {src=S, temp=T} -> I#imul {src=O(S), temp=O(T)}; + #lea {mem=M, temp=T} -> I#lea {mem=O(M), temp=O(T)}; + #move {src=S, dst=D} -> I#move {src=O(S), dst=O(D)}; + #movsx {src=S, dst=D} -> I#movsx {src=O(S), dst=O(D)}; + #movzx {src=S, dst=D} -> I#movzx {src=O(S), dst=O(D)}; + #shift {src=S, dst=D} -> I#shift {src=O(S), dst=O(D)}; + #test {src=S, dst=D} -> I#test {src=O(S), dst=O(D)}; + #fp_unop{arg=[]} -> I; + #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; + #move64 {dst=D} -> I#move64 {dst=O(D)}; + #push {src=S} -> I#push {src=O(S)}; + #pop {dst=D} -> I#pop {dst=O(D)}; + #jmp_switch{temp=T, jtab=J} -> + I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; + #pseudo_call{'fun'=F} -> + I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; + #pseudo_spill_fmove{src=S, temp=T, dst=D} -> + I#pseudo_spill_fmove{src=O(S), temp=O(T), dst=O(D)}; + #pseudo_spill_move{src=S, temp=T, dst=D} -> + I#pseudo_spill_move{src=O(S), temp=O(T), dst=O(D)}; + #pseudo_tailcall{'fun'=F, stkargs=Stk} -> + I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), + stkargs=lists:map(O, Stk)}; + #comment{} -> I; + #jmp_label{} -> I; + #pseudo_tailcall_prepare{} -> I; + #pseudo_jcc{} -> I; + #ret{} -> I + end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(_SubstTemp, I=#x86_imm{}) -> I; +oper_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T); +oper_temps(SubstTemp, M=#x86_mem{base=Base,off=Off}) -> + M#x86_mem{base=oper_temps(SubstTemp, Base), + off =oper_temps(SubstTemp, Off)}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, MFA=#x86_mfa{}) -> MFA; +funv_temps(_SubstTemp, P=#x86_prim{}) -> P; +funv_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). + +%% TODO: Undo this ifdeffery at the source (make jtab an #x86_imm{} on x86) +-ifdef(HIPE_AMD64). +jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). +-else. +jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. +-endif. + +-type lbl_subst_fun() :: fun((label()) -> label()). + +%% @doc Maps over the branch targets in an instruction +-spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). +insn_lbls(SubstLbl, I) -> + case I of + #jmp_label{label=Label} -> + I#jmp_label{label=SubstLbl(Label)}; + #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> + I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), + contlab=SubstLbl(Contlab)}; + #pseudo_jcc{true_label=T, false_label=F} -> + I#pseudo_jcc{true_label=SubstLbl(T), false_label=SubstLbl(F)} + end. + +sdesc_lbls(_SubstLbl, Sdesc=#x86_sdesc{exnlab=[]}) -> Sdesc; +sdesc_lbls(SubstLbl, Sdesc=#x86_sdesc{exnlab=Exnlab}) -> + Sdesc#x86_sdesc{exnlab=SubstLbl(Exnlab)}. diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..85268ab85a 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% Floating point handling. @@ -41,13 +35,12 @@ %%---------------------------------------------------------------------- -map(Defun) -> - CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) -> %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - hipe_x86_cfg:linearise(CFG1). + CFG1. do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of |