diff options
Diffstat (limited to 'lib/hipe/x86')
26 files changed, 730 insertions, 507 deletions
diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile index 93f8b955dd..84edeaebe7 100644 --- a/lib/hipe/x86/Makefile +++ b/lib/hipe/x86/Makefile @@ -60,9 +60,9 @@ MODULES=hipe_rtl_to_x86 \ hipe_x86_ra_ls \ hipe_x86_ra_naive \ hipe_x86_ra_postconditions \ - hipe_x86_ra_x87_ls \ hipe_x86_registers \ hipe_x86_spill_restore \ + hipe_x86_subst \ hipe_x86_x87 HRL_FILES=hipe_x86.hrl @@ -133,7 +133,6 @@ $(EBIN)/hipe_x86_ra: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl -$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl $(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl $(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl $(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM index 4c241cacb4..c518ea3481 100644 --- a/lib/hipe/x86/NOTES.OPTIM +++ b/lib/hipe/x86/NOTES.OPTIM @@ -1,5 +1,3 @@ -$Id$ - Partial x86 code optimisation guide =================================== Priority should be given to P6 and P4, then K7, diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA index ce80411642..173eaf229e 100644 --- a/lib/hipe/x86/NOTES.RA +++ b/lib/hipe/x86/NOTES.RA @@ -1,5 +1,3 @@ -$Id$ - Register Allocation =================== diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index d13f63b1d9..31e4f6e4ac 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% Translate 3-address RTL code to 2-address pseudo-x86 code. @@ -85,32 +78,37 @@ conv_insn(I, Map, Data) -> true -> conv_shift(Dst, Src1, BinOp, Src2); false -> - conv_alu(Dst, Src1, BinOp, Src2, []) + conv_alu_nocc(Dst, Src1, BinOp, Src2, []) end, {FixSrc1++FixSrc2++I2, Map2, Data}; #alub{} -> %% dst = src1 op src2; if COND goto label BinOp = conv_binop(hipe_rtl:alub_op(I)), - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), Cc = conv_cond(hipe_rtl:alub_cond(I)), - I1 = [hipe_x86:mk_pseudo_jcc(Cc, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I))], - I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), - {FixSrc1++FixSrc2++I2, Map2, Data}; - #branch{} -> - %% <unused> = src1 - src2; if COND goto label - {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cc = conv_cond(hipe_rtl:branch_cond(I)), - I2 = conv_branch(Src1, Cc, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {FixSrc1++FixSrc2++I2, Map1, Data}; + BranchOp = conv_branchop(BinOp), + HasDst = hipe_rtl:alub_has_dst(I), + {I2, Map3} = + case (not HasDst) andalso BranchOp =/= none of + true -> + {conv_branch(Src1, BranchOp, Src2, Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), Map1}; + false -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2} + end, + {FixSrc1++FixSrc2++I2, Map3, Data}; #call{} -> %% push <arg1> %% ... @@ -126,7 +124,6 @@ conv_insn(I, Map, Data) -> hipe_rtl:call_continuation(I), hipe_rtl:call_fail(I), hipe_rtl:call_type(I)), - %% XXX Fixme: this ++ is probably inefficient. {FixArgs++I2, Map2, Data}; #comment{} -> I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], @@ -144,7 +141,7 @@ conv_insn(I, Map, Data) -> {I2, Map, Data}; #load{} -> {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), - {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixSrc, Src, Map1} = conv_src_noimm(hipe_rtl:load_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of {byte, signed} -> @@ -171,6 +168,7 @@ conv_insn(I, Map, Data) -> Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), I2 = [hipe_x86:mk_move(Src, Dst)], {I2, Map0, Data}; + #move{src=Dst, dst=Dst} -> {[], Map, Data}; #move{} -> {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), @@ -182,11 +180,11 @@ conv_insn(I, Map, Data) -> I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), {FixArgs++I2, Map0, Data}; #store{} -> - {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixPtr, Ptr, Map0} = conv_src_noimm(hipe_rtl:store_base(I), Map), {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), - {FixSrc++FixOff++I2, Map2, Data}; + {FixPtr++FixSrc++FixOff++I2, Map2, Data}; #switch{} -> % this one also updates Data :-( %% from hipe_rtl2sparc, but we use a hairy addressing mode %% instead of doing the arithmetic manually @@ -206,7 +204,7 @@ conv_insn(I, Map, Data) -> {I2, Map1, NewData}; #fload{} -> {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), - {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Src, Map1} = conv_src_noimm(hipe_rtl:fload_src(I), Map0), {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], {I2, Map2, Data}; @@ -249,6 +247,34 @@ conv_insn(I, Map, Data) -> %%% Finalise the conversion of a 3-address ALU operation, taking %%% care to not introduce more temps and moves than necessary. +conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) + %% We could use orelse instead of xor here to generate lea T1(T2), T3, but + %% they seem to move coalesce so well that move+add is better for them. + andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'add', Src2, Tail); + true -> % Use LEA + Type = typeof_dst(Dst), + Mem = case hipe_x86:is_temp(Src1) of + true -> hipe_x86:mk_mem(Src1, Src2, Type); + false -> hipe_x86:mk_mem(Src2, Src1, Type) + end, + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1) + andalso (not hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'sub', Src2, Tail); + true -> % Use LEA + Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)), + Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)), + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; +conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> + conv_alu(Dst, Src1, BinOp, Src2, Tail). + conv_alu(Dst, Src1, 'imul', Src2, Tail) -> mk_imul(Src1, Src2, Dst, Tail); conv_alu(Dst, Src1, BinOp, Src2, Tail) -> @@ -343,28 +369,41 @@ conv_shift(Dst, Src1, BinOp, Src2) -> %%% Finalise the conversion of a conditional branch operation, taking %%% care to not introduce more temps and moves than necessary. -conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +conv_branchop('sub') -> 'cmp'; +conv_branchop('and') -> 'test'; +conv_branchop(_) -> none. + +branchop_commutes('cmp') -> false; +branchop_commutes('test') -> true. + +conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> case hipe_x86:is_imm(Src1) of false -> - mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred); true -> case hipe_x86:is_imm(Src2) of false -> - NewCc = commute_cc(Cc), - mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + NewCc = case branchop_commutes(Op) of + true -> Cc; + false -> commute_cc(Cc) + end, + mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred); true -> %% two immediates, let the optimiser clean it up Tmp = new_untagged_temp(), [hipe_x86:mk_move(Src1, Tmp) | - mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)] end end. -mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> %% PRE: not(is_imm(Src1)) - [hipe_x86:mk_cmp(Src2, Src1), + [mk_branchtest(Src1, Op, Src2), hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. +mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1); +mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1). + %%% Convert an RTL ALU or ALUB binary operator. conv_binop(BinOp) -> @@ -572,6 +611,16 @@ conv_fun(Fun, Map) -> end end. +conv_src_noimm(Opnd, Map) -> + R={FixSrc0, Src, NewMap} = conv_src(Opnd, Map), + case hipe_x86:is_imm(Src) of + false -> R; + true -> + Tmp = new_untagged_temp(), + {FixSrc0 ++ [hipe_x86:mk_move(Src, Tmp)], + Tmp, NewMap} + end. + %%% Convert an RTL source operand (imm/var/reg). conv_src(Opnd, Map) -> diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl index 33d7f77cf1..f514dd1ded 100644 --- a/lib/hipe/x86/hipe_x86.erl +++ b/lib/hipe/x86/hipe_x86.erl @@ -1,8 +1,3 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% representation of 2-address pseudo-amd64 code @@ -37,7 +30,7 @@ mk_imm_from_addr/2, mk_imm_from_atom/1, is_imm/1, - %% imm_value/1, + imm_value/1, mk_mem/3, %% is_mem/1, @@ -174,6 +167,12 @@ mk_pseudo_spill/1, + mk_pseudo_spill_fmove/3, + is_pseudo_spill_fmove/1, + + mk_pseudo_spill_move/3, + is_pseudo_spill_move/1, + mk_pseudo_tailcall/4, %% is_pseudo_tailcall/1, pseudo_tailcall_fun/1, @@ -201,7 +200,7 @@ shift_src/1, shift_dst/1, - %% mk_test/2, + mk_test/2, test_src/1, test_dst/1, @@ -218,6 +217,10 @@ %% highest_temp/1 ]). +%% Other utilities +-export([neg_cc/1 + ]). + %%% %%% Low-level accessors. %%% @@ -241,7 +244,7 @@ mk_imm_from_addr(Addr, Type) -> mk_imm_from_atom(Atom) -> mk_imm(Atom). is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. -%% imm_value(#x86_imm{value=Value}) -> Value. +imm_value(#x86_imm{value=Value}) -> Value. mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. @@ -305,7 +308,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. cmp_src(#cmp{src=Src}) -> Src. cmp_dst(#cmp{dst=Dst}) -> Dst. -%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. +mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. test_src(#test{src=Src}) -> Src. test_dst(#test{dst=Dst}) -> Dst. @@ -428,6 +431,14 @@ mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> mk_pseudo_spill(List) -> #pseudo_spill{args=List}. +mk_pseudo_spill_fmove(Src, Temp, Dst) -> + #pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_fmove(I) -> is_record(I, pseudo_spill_fmove). + +mk_pseudo_spill_move(Src, Temp, Dst) -> + #pseudo_spill_move{src=Src, temp=Temp, dst=Dst}. +is_pseudo_spill_move(I) -> is_record(I, pseudo_spill_move). + mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> check_linkage(Linkage), #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl index ef99bf90d9..6cd69905b2 100644 --- a/lib/hipe/x86/hipe_x86.hrl +++ b/lib/hipe/x86/hipe_x86.hrl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% concrete representation of 2-address pseudo-x86 code @@ -98,6 +91,8 @@ -record(pseudo_call, {'fun', sdesc, contlab, linkage}). -record(pseudo_jcc, {cc, true_label, false_label, pred}). -record(pseudo_spill, {args=[]}). +-record(pseudo_spill_move, {src, temp, dst}). +-record(pseudo_spill_fmove, {src, temp, dst}). -record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). -record(pseudo_tailcall_prepare, {}). -record(push, {src}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index e21223a5b1..50919bdf4e 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% HiPE/x86 assembler %%% @@ -69,7 +63,7 @@ assemble(CompiledCode, Closures, Exports, Options) -> || {MFA, Defun} <- CompiledCode], %% {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = - hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + hipe_pack_constants:pack_constants(Code), %% {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = encode(translate(Code, ConstMap, Options), Options), @@ -154,6 +148,8 @@ insn_size(I) -> translate_insn(I, Context, Options) -> case I of + #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} -> + [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}]; #alu{} -> Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), [{hipe_x86:alu_op(I), Arg, I}]; @@ -234,11 +230,11 @@ translate_insn(I, Context, Options) -> #move64{} -> translate_move64(I, Context); #movsx{} -> - Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), - [{movsx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movsx_src(I)), + [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}]; #movzx{} -> - Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), - [{movzx, Arg, I}]; + Src = resolve_movx_src(hipe_x86:movzx_src(I)), + [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}]; %% pseudo_call: eliminated before assembly %% pseudo_jcc: eliminated before assembly %% pseudo_tailcall: eliminated before assembly @@ -599,10 +595,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) -> {xmm, Reg}. -ifdef(HIPE_AMD64). +temp_to_rm8(#x86_temp{reg=Reg}) -> + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64(#x86_temp{reg=Reg}) -> {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-else. +temp_to_rm8(#x86_temp{reg=Reg}) -> + true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg), + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm16(#x86_temp{reg=Reg}) -> + {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -endif. +temp_to_rm32(#x86_temp{reg=Reg}) -> + {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rmArch(#x86_temp{reg=Reg}) -> {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64fp(#x86_temp{reg=Reg}) -> @@ -841,16 +847,15 @@ translate_move64(I, _Context) -> exit({?MODULE, I}). -endif. %%% mov{s,z}x -resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> - {temp_to_regArch(Dst), - case Type of - byte -> - mem_to_rm8(Src); - int16 -> - mem_to_rm16(Src); - int32 -> - mem_to_rm32(Src) - end}. +resolve_movx_src(Src=#x86_mem{type=Type}) -> + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end. %%% alu/cmp (_not_ test) resolve_alu_args(Src, Dst, Context) -> @@ -878,15 +883,29 @@ resolve_alu_args(Src, Dst, Context) -> %%% test resolve_test_args(Src, Dst, Context) -> case Src of - #x86_imm{} -> % imm8 not allowed - {_ImmSize,ImmValue} = translate_imm(Src, Context, false), - NewDst = - case Dst of - #x86_temp{reg=0} -> ?EAX; - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, - {NewDst, {imm32,ImmValue}}; + %% Since we're using an 8-bit instruction, the immediate is not sign + %% extended. Thus, we can use immediates up to 255. + #x86_imm{value=ImmVal} + when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 -> + Imm = {imm8, ImmVal}, + case Dst of + #x86_temp{reg=0} -> {al, Imm}; + #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst); + #x86_mem{} -> {mem_to_rm8(Dst), Imm} + end; + #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 -> + {case Dst of + #x86_temp{reg=0} -> eax; + #x86_temp{} -> temp_to_rm32(Dst); + #x86_mem{} -> mem_to_rm32(Dst) + end, {imm32, ImmVal}}; + #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32 + {_, ImmVal} = translate_imm(Src, Context, false), + {case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, {imm32, ImmVal}}; #x86_temp{} -> NewDst = case Dst of @@ -896,6 +915,18 @@ resolve_test_args(Src, Dst, Context) -> {NewDst, temp_to_regArch(Src)} end. +-ifdef(HIPE_AMD64). +resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}. +-else. +resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) -> + case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of + true -> {temp_to_rm8(Dst), Imm}; + false -> + %% Register does not exist in 8-bit version; use 16-bit instead + {temp_to_rm16(Dst), {imm16, ImmVal}} + end. +-endif. + %%% shifts resolve_shift_args(Src, Dst, Context) -> RM32 = diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl index ab40b68580..0a3c0fc9d6 100644 --- a/lib/hipe/x86/hipe_x86_cfg.erl +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,24 +11,22 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -module(hipe_x86_cfg). -export([init/1, labels/1, start_label/1, succ/2, pred/2, - bb/2, bb_add/3]). + bb/2, bb_add/3, map_bbs/2, fold_bbs/3]). -export([postorder/1, reverse_postorder/1]). --export([linearise/1, params/1, arity/1, redirect_jmp/3]). +-export([linearise/1, params/1, arity/1, redirect_jmp/3, branch_preds/1]). %%% these tell cfg.inc what to define (ugly as hell) -define(PRED_NEEDED,true). -define(BREADTH_ORDER,true). -define(PARAMS_NEEDED,true). -define(START_LABEL_UPDATE_NEEDED,true). +-define(MAP_FOLD_NEEDED,true). -include("hipe_x86.hrl"). -include("../flow/cfg.hrl"). @@ -78,6 +72,26 @@ branch_successors(Branch) -> #ret{} -> [] end. +branch_preds(Branch) -> + case Branch of + #jmp_switch{labels=Labels} -> + Prob = 1.0/length(Labels), + [{L, Prob} || L <- Labels]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=[]}} -> + %% A function can still cause an exception, even if we won't catch it + [{ContLab, 1.0-hipe_bb_weights:call_exn_pred()}]; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> + CallExnPred = hipe_bb_weights:call_exn_pred(), + [{ContLab, 1.0-CallExnPred}, {ExnLab, CallExnPred}]; + #pseudo_jcc{true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [{FalseLab, 1.0-Pred}, {TrueLab, Pred}]; + _ -> + case branch_successors(Branch) of + [] -> []; + [Single] -> [{Single, 1.0}] + end + end. + -ifdef(REMOVE_TRIVIAL_BBS_NEEDED). fails_to(_Instr) -> []. -endif. @@ -107,7 +121,7 @@ mk_goto(Label) -> hipe_x86:mk_jmp_label(Label). is_label(I) -> - hipe_x86:is_label(I). + case I of #label{} -> true; _ -> false end. label_name(Label) -> hipe_x86:label_label(Label). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 9cba6cbe4b..2731836dc1 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% compute def/use sets for x86 insns %%% @@ -35,7 +29,7 @@ -endif. -module(?HIPE_X86_DEFUSE). --export([insn_def/1, insn_use/1]). %% src_use/1]). +-export([insn_def/1, insn_defs_all/1, insn_use/1]). %% src_use/1]). -include("../x86/hipe_x86.hrl"). %%% @@ -57,13 +51,25 @@ insn_def(I) -> #movzx{dst=Dst} -> dst_def(Dst); #pseudo_call{} -> call_clobbered(); #pseudo_spill{} -> []; + #pseudo_spill_fmove{temp=Temp, dst=Dst} -> [Temp, Dst]; + #pseudo_spill_move{temp=Temp, dst=Dst} -> [Temp, Dst]; #pseudo_tailcall_prepare{} -> tailcall_clobbered(); #shift{dst=Dst} -> dst_def(Dst); %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label - %% pseudo_jcc, pseudo_tailcall, push, ret + %% pseudo_jcc, pseudo_tailcall, push, ret, test _ -> [] end. + +%% @doc Answers whether instruction I defines all allocatable registers. Used by +%% hipe_regalloc_prepass. +-spec insn_defs_all(_) -> boolean(). +insn_defs_all(I) -> + case I of + #pseudo_call{} -> true; + _ -> false + end. + dst_def(Dst) -> case Dst of #x86_temp{} -> [Dst]; @@ -104,12 +110,15 @@ insn_use(I) -> #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> addtemp(Fun, arity_use(Arity)); #pseudo_spill{args=Args} -> Args; + #pseudo_spill_fmove{src=Src} -> [Src]; + #pseudo_spill_move{src=Src} -> [Src]; #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), arity_use(Arity)))); #push{src=Src} -> addtemp(Src, []); #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare _ -> [] end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl index 3b7be86608..2662f76d0b 100644 --- a/lib/hipe/x86/hipe_x86_encode.erl +++ b/lib/hipe/x86/hipe_x86_encode.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,8 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Copyright (C) 2000-2005 Mikael Pettersson %%% @@ -65,6 +58,7 @@ cc/1, % 8-bit registers %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + reg_has_8bit/1, % 32-bit registers %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, % operands @@ -143,6 +137,8 @@ cc(g) -> ?CC_G. %% dh() -> ?DH. %% bh() -> ?BH. +reg_has_8bit(Reg) -> Reg =< ?BL. + %%% 32-bit registers -define(EAX, 2#000). @@ -700,8 +696,16 @@ shd_op_sizeof(Opnds) -> test_encode(Opnds) -> case Opnds of + {al, {imm8,Imm8}} -> + [16#A8, Imm8]; + {ax, {imm16,Imm16}} -> + [?PFX_OPND, 16#A9 | le16(Imm16, [])]; {eax, {imm32,Imm32}} -> [16#A9 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [16#F6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; {{rm32,RM32}, {imm32,Imm32}} -> [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; {{rm32,RM32}, {reg32,Reg32}} -> @@ -710,8 +714,16 @@ test_encode(Opnds) -> test_sizeof(Opnds) -> case Opnds of + {al, {imm8,_}} -> + 1 + 1; + {ax, {imm16,_}} -> + 2 + 2; {eax, {imm32,_}} -> 1 + 4; + {{rm8,RM8}, {imm8,_}} -> + 1 + sizeof_rm(RM8) + 1; + {{rm16,RM16}, {imm16,_}} -> + 2 + sizeof_rm(RM16) + 2; {{rm32,RM32}, {imm32,_}} -> 1 + sizeof_rm(RM32) + 4; {{rm32,RM32}, {reg32,_}} -> @@ -1283,7 +1295,11 @@ dotest1(OS) -> t(OS,'sub',{RM32,Imm8}), t(OS,'sub',{RM32,Reg32}), t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{al,Imm8}), + t(OS,'test',{ax,Imm16}), t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM8,Imm8}), + t(OS,'test',{RM16,Imm16}), t(OS,'test',{RM32,Imm32}), t(OS,'test',{RM32,Reg32}), t(OS,'xor',{eax,Imm32}), diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt index 13746e2a47..eab732fb2d 100644 --- a/lib/hipe/x86/hipe_x86_encode.txt +++ b/lib/hipe/x86/hipe_x86_encode.txt @@ -1,5 +1,3 @@ -$Id$ - hipe_x86_encode USAGE GUIDE Revision 0.4, 2001-10-09 diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index 8851ead250..558321d0c3 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86 stack frame handling %%% @@ -46,15 +40,13 @@ -include("../x86/hipe_x86.hrl"). -include("../rtl/hipe_literals.hrl"). -frame(Defun, _Options) -> - Formals = fix_formals(hipe_x86:defun_formals(Defun)), - Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), - MinFrame = defun_minframe(Defun), +frame(CFG0, _Options) -> + Formals = fix_formals(hipe_x86_cfg:params(CFG0)), + Temps0 = all_temps(CFG0, Formals), + MinFrame = defun_minframe(CFG0), Temps = ensure_minframe(MinFrame, Temps0), - CFG0 = hipe_x86_cfg:init(Defun), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), - CFG1 = do_body(CFG0, Liveness, Formals, Temps), - hipe_x86_cfg:linearise(CFG1). + do_body(CFG0, Liveness, Formals, Temps). fix_formals(Formals) -> fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). @@ -69,23 +61,14 @@ do_body(CFG0, Liveness, Formals, Temps) -> do_prologue(CFG1, Context). do_blocks(CFG, Context) -> - Labels = hipe_x86_cfg:labels(CFG), - do_blocks(Labels, CFG, Context). + hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). -do_blocks([Label|Labels], CFG, Context) -> +do_block(Label, Block, Context) -> Liveness = context_liveness(Context), LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), - Block = hipe_x86_cfg:bb(CFG, Label), Code = hipe_bb:code(Block), - NewCode = do_block(Code, LiveOut, Context), - NewBlock = hipe_bb:code_update(Block, NewCode), - NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), - do_blocks(Labels, NewCFG, Context); -do_blocks([], CFG, _) -> - CFG. - -do_block(Insns, LiveOut, Context) -> - do_block(Insns, LiveOut, Context, context_framesize(Context), []). + NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), + hipe_bb:code_update(Block, NewCode). do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), @@ -112,13 +95,17 @@ do_insn(I, LiveOut, Context, FPoff) -> #imul{} -> {[do_imul(I, Context, FPoff)], FPoff}; #move{} -> - {[do_move(I, Context, FPoff)], FPoff}; + {do_move(I, Context, FPoff), FPoff}; #movsx{} -> {[do_movsx(I, Context, FPoff)], FPoff}; #movzx{} -> {[do_movzx(I, Context, FPoff)], FPoff}; #pseudo_call{} -> do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_spill_fmove{} -> + {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; + #pseudo_spill_move{} -> + {do_pseudo_spill_move(I, Context, FPoff), FPoff}; #pseudo_tailcall{} -> {do_pseudo_tailcall(I, Context), context_framesize(Context)}; #push{} -> @@ -127,6 +114,8 @@ do_insn(I, LiveOut, Context, FPoff) -> {do_ret(I, Context, FPoff), context_framesize(Context)}; #shift{} -> {[do_shift(I, Context, FPoff)], FPoff}; + #test{} -> + {[do_test(I, Context, FPoff)], FPoff}; _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare {[I], FPoff} end. @@ -159,22 +148,50 @@ do_fp_binop(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), [I#fp_binop{src=Src,dst=Dst}]. -do_fmove(I, Context, FPoff) -> - #fmove{src=Src0,dst=Dst0} = I, +do_fmove(I0, Context, FPoff) -> + #fmove{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#fmove{src=Src,dst=Dst}. + I = I0#fmove{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_fmove(I0, Context, FPoff) -> + #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}] + end. do_imul(I, Context, FPoff) -> #imul{src=Src0} = I, Src = conv_opnd(Src0, FPoff, Context), I#imul{src=Src}. -do_move(I, Context, FPoff) -> - #move{src=Src0,dst=Dst0} = I, +do_move(I0, Context, FPoff) -> + #move{src=Src0,dst=Dst0} = I0, Src = conv_opnd(Src0, FPoff, Context), Dst = conv_opnd(Dst0, FPoff, Context), - I#move{src=Src,dst=Dst}. + I = I0#move{src=Src,dst=Dst}, + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [I] + end. + +do_pseudo_spill_move(I0, Context, FPoff) -> + #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0, + Src = conv_opnd(Src0, FPoff, Context), + Temp = conv_opnd(Temp0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + case Src =:= Dst of + true -> []; % omit move-to-self + false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}] + end. do_movsx(I, Context, FPoff) -> #movsx{src=Src0,dst=Dst0} = I, @@ -199,6 +216,12 @@ do_shift(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), I#shift{src=Src,dst=Dst}. +do_test(I, Context, FPoff) -> + #test{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#test{src=Src,dst=Dst}. + conv_opnd(Opnd, FPoff, Context) -> case opnd_is_pseudo(Opnd) of false -> @@ -609,39 +632,46 @@ temp_is_pseudo(Temp) -> %%% Build the set of all temps used in a Defun's body. %%% -all_temps(Code, Formals) -> - S0 = find_temps(Code, tset_empty()), +all_temps(CFG, Formals) -> + S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), S1 = tset_del_list(S0, Formals), S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), S2. -find_temps([I|Insns], S0) -> +find_temps(I, S0) -> S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), - S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), - find_temps(Insns, S2); -find_temps([], S) -> - S. + tset_add_list(S1, hipe_x86_defuse:insn_use(I)). + +fold_insns(Fun, InitAcc, CFG) -> + hipe_x86_cfg:fold_bbs( + fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, + InitAcc, CFG). + +-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, + tset_filter/2, tset_to_list/1]}). tset_empty() -> - gb_sets:new(). + #{}. tset_size(S) -> - gb_sets:size(S). + map_size(S). tset_insert(S, T) -> - gb_sets:add_element(T, S). + S#{T => []}. -tset_add_list(S, Ts) -> - gb_sets:union(S, gb_sets:from_list(Ts)). +tset_add_list(S, []) -> S; +tset_add_list(S, [T|Ts]) -> + tset_add_list(S#{T => []}, Ts). -tset_del_list(S, Ts) -> - gb_sets:subtract(S, gb_sets:from_list(Ts)). +tset_del_list(S, []) -> S; +tset_del_list(S, [T|Ts]) -> + tset_del_list(maps:remove(T,S), Ts). tset_filter(S, F) -> - gb_sets:filter(F, S). + maps:filter(fun(K, _V) -> F(K) end, S). tset_to_list(S) -> - gb_sets:to_list(S). + maps:keys(S). %%% %%% Compute minimum permissible frame size, ignoring spilled temps. @@ -649,16 +679,11 @@ tset_to_list(S) -> %%% in the middle of a tailcall. %%% -defun_minframe(Defun) -> - MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), - MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), +defun_minframe(CFG) -> + MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), + MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), erlang:max(MaxTailArity - MyArity, 0). -body_mta([I|Code], MTA) -> - body_mta(Code, insn_mta(I, MTA)); -body_mta([], MTA) -> - MTA. - insn_mta(I, MTA) -> case I of #pseudo_tailcall{arity=Arity} -> diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl index ce46ec920e..470501b46d 100644 --- a/lib/hipe/x86/hipe_x86_liveness.erl +++ b/lib/hipe/x86/hipe_x86_liveness.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86_liveness -- compute register liveness for x86 CFGs diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl index 13b0bb6b28..7e9fd10e62 100644 --- a/lib/hipe/x86/hipe_x86_main.erl +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_MAIN, hipe_amd64_main). @@ -53,19 +46,23 @@ ?RTL_TO_X86(MFA, RTL, Options) -> Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), "RTL-to-"?X86STR, Options), - SpillRest = + TransCFG = ?option_time(hipe_x86_cfg:init(Translated), + ?X86STR" to cfg", Options), + SpillRestCFG = case proplists:get_bool(caller_save_spill_restore, Options) of true -> - ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(TransCFG, Options), ?X86STR" spill restore", Options); false -> - Translated + TransCFG end, - Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), - ?X86STR" register allocation", Options), - Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), - ?X86STR" frame", Options), - Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), - ?X86STR" finalise", Options), + AllocatedCFG = ?option_time(?HIPE_X86_RA:ra(SpillRestCFG, Options), + ?X86STR" register allocation", Options), + FramedCFG = ?option_time(?HIPE_X86_FRAME:frame(AllocatedCFG, Options), + ?X86STR" frame", Options), + Framed = ?option_time(hipe_x86_cfg:linearise(FramedCFG), + ?X86STR" linearise", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl index 939baeccec..925054dd68 100644 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2003-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%%---------------------------------------------------------------------- %%% File : hipe_x86_postpass.erl @@ -63,9 +57,10 @@ postpass(#defun{code=Code0}=Defun, Options) -> peephole_optimization(Insns) -> peep(Insns, [], []). -%% MoveSelf related peep-opts + +%% MoveSelf related peep-opts %% ------------------------------ -peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> +peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> peep(Insns, Res, [moveSelf1|Lst]); peep([I=#fmove{src=Src, dst=Dst}, #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> @@ -95,7 +90,8 @@ peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, %% ElimBinALMDouble %% ---------------- -peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) -> +peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) + when not is_record(Dst, x86_mem) -> peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]); @@ -119,19 +115,15 @@ peep([#move{src=Src1, dst=Dst}, %% ElimCmp0 %% -------- -peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) -> - case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and - ((Cond =:= 'eq') or (Cond =:= 'neq'))) of - true -> - Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, - Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end, - Test = #test{src=Src2, dst=#x86_imm{value=0}}, - Jump = #jcc{cc=Cond2, label=Lab}, - peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]); - _ -> - peep(Insns, [J,C|Res], Lst) - end; - +peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> + %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in + %% this case to 0). However, since HiPE does not use any instructions that + %% read the adjust flag, we can do this transform safely. + peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]); +peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}}, + J=#jcc{cc=Cond}|Insns],Res,Lst) + when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison + peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]); %% ElimCmpTest %% ----------- @@ -168,8 +160,7 @@ peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> %% ElimSet0 %% -------- -peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) -when (Dst==#x86_temp{}) -> +peep([#move{src=#x86_imm{value=0},dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); %% ElimMDPow2 @@ -186,6 +177,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> peep(Insns, [B|Res], Lst) end; +%% LeaToAdd +%% This rule transforms lea into add when the destination is the same as one of +%% the operands. Sound because lea is never used where the condition codes are +%% live (and would be clobbered by add). +%% ---------- +peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); +peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); + %% SubToDec %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which %% changes reduction counter tests to use decl instead of subl. @@ -208,6 +211,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []). goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> goto_elim([I|Insns], Res); +goto_elim([#jcc{cc=CC, label=Label} = IJCC, + #jmp_label{label=BranchTgt}, + #label{label=Label} = ILBL|Insns], Res) -> + goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt}, + ILBL|Insns], Res); goto_elim([I | Insns], Res) -> goto_elim(Insns, [I|Res]); goto_elim([], Res) -> diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl index 9352cf5dbf..72d2fa80bf 100644 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% x86 pretty-printer @@ -171,7 +165,7 @@ pp_insn(Dev, I, Pre) -> #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> io:format(Dev, "\tpseudo_tailcall ", []), pp_fun(Dev, Fun), - io:format(Dev, "~w (", [Arity]), + io:format(Dev, " ~w (", [Arity]), pp_args(Dev, StkArgs), io:format(Dev, ") ~w\n", [Linkage]); #pseudo_tailcall_prepare{} -> @@ -188,6 +182,12 @@ pp_insn(Dev, I, Pre) -> io:format(Dev, ", ", []), pp_dst(Dev, Dst), io:format(Dev, "\n", []); + #test{src=Src, dst=Dst} -> + io:format(Dev, "\ttest ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); #fp_binop{src=Src, dst=Dst, op=Op} -> io:format(Dev, "\t~s ", [Op]), pp_dst(Dev, Dst), diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl index f66961a7a7..f358306d49 100644 --- a/lib/hipe/x86/hipe_x86_ra.erl +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_RA, hipe_amd64_ra). @@ -41,60 +34,83 @@ %%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun0, Options) -> - %% ?HIPE_X86_PP:pp(Defun0), - {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), - %% ?HIPE_X86_PP:pp(Defun1), +-ifdef(HIPE_INSTRUMENT_COMPILER). +code_size(CFG) -> + hipe_x86_cfg:fold_bbs(fun(_, BB, Acc) -> Acc + length(hipe_bb:code(BB)) end, + 0, CFG). +-endif. %% ifdef(HIPE_INSTRUMENT_COMPILER) + +ra(CFG0, Options) -> + %% hipe_x86_cfg:pp(CFG0), + Liveness0 = ?HIPE_X86_SPECIFIC:analyze(CFG0, no_context), + {CFG1, Liveness, Coloring_fp, SpillIndex} = ra_fp(CFG0, Liveness0, Options), + %% hipe_x86_cfg:pp(CFG1), ?start_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun1)), - element(2,hipe_x86:defun_var_range(Defun1))), - {Defun2, Coloring} + code_size(CFG1), + element(2,hipe_gensym:var_range(x86))), + {CFG2, _, Coloring} = case proplists:get_value(regalloc, Options, coalescing) of coalescing -> - ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_coalescing_regalloc); optimistic -> - ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_optimistic_regalloc); graph_color -> - ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + ra(CFG1, Liveness, SpillIndex, Options, hipe_graph_coloring_regalloc); linear_scan -> - ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + ?HIPE_X86_RA_LS:ra(CFG1, Liveness, SpillIndex, Options); naive -> - ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + ?HIPE_X86_RA_NAIVE:ra(CFG1, Liveness, Coloring_fp, Options); _ -> exit({unknown_regalloc_compiler_option, proplists:get_value(regalloc,Options)}) end, ?stop_ra_instrumentation(Options, - length(hipe_x86:defun_code(Defun2)), - element(2,hipe_x86:defun_var_range(Defun2))), - %% ?HIPE_X86_PP:pp(Defun2), - ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + code_size(CFG2), + element(2,hipe_gensym:var_range(x86))), + %% hipe_x86_cfg:pp(CFG2), + ?HIPE_X86_RA_FINALISE:finalise(CFG2, Coloring, Coloring_fp, Options). -ra(Defun, SpillIndex, Options, RegAllocMod) -> - hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). +ra(CFG, Liveness, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(CFG, Liveness, SpillIndex, Options, RegAllocMod, + ?HIPE_X86_SPECIFIC, no_context). -ifdef(HIPE_AMD64). -ra_fp(Defun, Options) -> - case proplists:get_bool(inline_fp, Options) and - (proplists:get_value(regalloc, Options) =/= naive) of - true -> - case proplists:get_bool(x87, Options) of - true -> - hipe_amd64_ra_x87_ls:ra(Defun, Options); - false -> - hipe_regalloc_loop:ra_fp(Defun, Options, - hipe_coalescing_regalloc, - hipe_amd64_specific_sse2) - end; - false -> - {Defun,[],0} +ra_fp(CFG, Liveness, Options) -> + Regalloc0 = proplists:get_value(regalloc, Options), + {Regalloc, TargetMod} = + case proplists:get_bool(inline_fp, Options) and (Regalloc0 =/= naive) of + false -> {naive, undefined}; + true -> + case proplists:get_bool(x87, Options) of + true -> {linear_scan, hipe_amd64_specific_x87}; + false -> {Regalloc0, hipe_amd64_specific_sse2} + end + end, + case Regalloc of + coalescing -> + ra_fp(CFG, Liveness, Options, hipe_coalescing_regalloc, TargetMod); + optimistic -> + ra_fp(CFG, Liveness, Options, hipe_optimistic_regalloc, TargetMod); + graph_color -> + ra_fp(CFG, Liveness, Options, hipe_graph_coloring_regalloc, TargetMod); + linear_scan -> hipe_amd64_ra_ls:ra_fp(CFG, Liveness, Options, TargetMod, + no_context); + naive -> {CFG,Liveness,[],0}; + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) end. + +ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod) -> + hipe_regalloc_loop:ra_fp(CFG, Liveness, Options, RegAllocMod, TargetMod, + no_context). -else. -ra_fp(Defun, Options) -> +ra_fp(CFG, Liveness, Options) -> case proplists:get_bool(inline_fp, Options) of true -> - hipe_x86_ra_x87_ls:ra(Defun, Options); + hipe_x86_ra_ls:ra_fp(CFG, Liveness, Options, hipe_x86_specific_x87, + no_context); false -> - {Defun,[],0} + {CFG,Liveness,[],0} end. -endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index 5dd75cb7ae..e8abe78e00 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2004-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% - apply temp -> reg/spill map from RA @@ -25,23 +18,36 @@ -define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_SSE2, hipe_amd64_sse2). +-define(IF_HAS_SSE2(Expr), Expr). -else. -define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(HIPE_X86_X87, hipe_x86_x87). +-define(IF_HAS_SSE2(Expr),). -endif. -module(?HIPE_X86_RA_FINALISE). -export([finalise/4]). -include("../x86/hipe_x86.hrl"). -finalise(Defun, TempMap, FpMap, Options) -> - Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), +finalise(CFG0, TempMap, FpMap, Options) -> + CFG1 = finalise_ra(CFG0, TempMap, FpMap, Options), case proplists:get_bool(x87, Options) of true -> - ?HIPE_X86_X87:map(Defun1); + ?HIPE_X86_X87:map(CFG1); _ -> - Defun1 + case + proplists:get_bool(inline_fp, Options) + and (proplists:get_value(regalloc, Options) =:= linear_scan) + of + %% Ugly, but required to avoid Dialyzer complaints about "Unknown + %% function" hipe_x86_sse2:map/1 + ?IF_HAS_SSE2(true -> + ?HIPE_X86_SSE2:map(CFG1);) + false -> + CFG1 + end end. %%% @@ -50,15 +56,16 @@ finalise(Defun, TempMap, FpMap, Options) -> %%% but I just want this to work now) %%% -finalise_ra(Defun, [], [], _Options) -> - Defun; -finalise_ra(Defun, TempMap, FpMap, Options) -> - Code = hipe_x86:defun_code(Defun), - {_, SpillLimit} = hipe_x86:defun_var_range(Defun), +finalise_ra(CFG, [], [], _Options) -> + CFG; +finalise_ra(CFG, TempMap, FpMap, Options) -> + {_, SpillLimit} = hipe_gensym:var_range(x86), Map = mk_ra_map(TempMap, SpillLimit), FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), - NewCode = ra_code(Code, Map, FpMap0), - Defun#defun{code=NewCode}. + hipe_x86_cfg:map_bbs(fun(_Lbl, BB) -> ra_bb(BB, Map, FpMap0) end, CFG). + +ra_bb(BB, Map, FpMap) -> + hipe_bb:code_update(BB, ra_code(hipe_bb:code(BB), Map, FpMap)). ra_code(Code, Map, FpMap) -> [ra_insn(I, Map, FpMap) || I <- Code]. @@ -133,6 +140,16 @@ ra_insn(I, Map, FpMap) -> I#pseudo_call{'fun'=Fun}; #pseudo_jcc{} -> I; + #pseudo_spill_fmove{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Temp = ra_opnd(Temp0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#pseudo_spill_fmove{src=Src, temp=Temp, dst=Dst}; + #pseudo_spill_move{src=Src0, temp=Temp0, dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Temp = ra_opnd(Temp0, Map), + Dst = ra_opnd(Dst0, Map), + I#pseudo_spill_move{src=Src, temp=Temp, dst=Dst}; #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> Fun = ra_opnd(Fun0, Map), StkArgs = ra_args(StkArgs0, Map), @@ -148,6 +165,10 @@ ra_insn(I, Map, FpMap) -> Src = ra_opnd(Src0, Map), Dst = ra_opnd(Dst0, Map), I#shift{src=Src,dst=Dst}; + #test{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#test{src=Src,dst=Dst}; _ -> exit({?MODULE,ra_insn,I}) end. @@ -230,49 +251,27 @@ mk_ra_map(TempMap, SpillLimit) -> gb_trees:empty(), TempMap). -conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> +conv_ra_maplet({From,To}, SpillLimit, IsPrecoloured) + when is_integer(From), From =< SpillLimit -> %% From should be a pseudo, or a hard reg mapped to itself. - if is_integer(From), From =< SpillLimit -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of - false -> []; - _ -> - case To of - {reg, From} -> []; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) + case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of + false -> ok; + _ -> To = {reg, From}, ok end, %% end of From check case To of - {reg, NewReg} -> + {reg, NewReg} when is_integer(NewReg) -> %% NewReg should be a hard reg, or a pseudo mapped %% to itself (formals are handled this way). - if is_integer(NewReg) -> - case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of - true -> []; - _ -> if From =:= NewReg -> []; - true -> - exit({?MODULE,conv_ra_maplet,MapLet}) - end - end; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of NewReg check + true = (?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) orelse From =:= NewReg), {From, NewReg}; - {spill, SpillIndex} -> - %% SpillIndex should be >= 0. - if is_integer(SpillIndex), SpillIndex >= 0 -> []; - true -> exit({?MODULE,conv_ra_maplet,MapLet}) - end, - %% end of SpillIndex check + {spill, SpillIndex} when is_integer(SpillIndex), SpillIndex >= 0 -> ToTempNum = SpillLimit+SpillIndex+1, MaxTempNum = hipe_gensym:get_var(x86), if MaxTempNum >= ToTempNum -> ok; true -> hipe_gensym:set_var(x86, ToTempNum) end, - {From, ToTempNum}; - _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + {From, ToTempNum} end. mk_ra_map_x87(FpMap, SpillLimit) -> diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl index 3e34433111..581abd299d 100644 --- a/lib/hipe/x86/hipe_x86_ra_ls.erl +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% Linear Scan register allocator for x86 @@ -35,41 +29,64 @@ -endif. -module(?HIPE_X86_RA_LS). --export([ra/3,regalloc/7]). +-export([ra/4,ra_fp/5]). -define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. -include("../main/hipe.hrl"). -ra(Defun, SpillIndex, Options) -> - NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), - CFG = hipe_x86_cfg:init(NewDefun), - +ra(CFG, Liveness, SpillIndex, Options) -> SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( - CFG), + CFG, no_context), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + alloc(CFG, Liveness, SpillIndex, SpillLimit, Options). + +ra_fp(CFG, Liveness, Options, TargetMod, TargetCtx) -> + ?inc_counter(ra_calls_counter,1), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = TargetMod:number_of_temporaries(CFG, TargetCtx), ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - alloc(NewDefun, SpillIndex, SpillLimit, Options). + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + + {Coloring,NewSpillIndex} = + regalloc(CFG, Liveness, + TargetMod:allocatable('linearscan', TargetCtx), + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + TargetMod, TargetCtx), + + {NewCFG, _DidSpill} = + TargetMod:check_and_rewrite(CFG, Coloring, 'linearscan', TargetCtx), + TempMap = hipe_temp_map:cols2tuple(Coloring, TargetMod, TargetCtx), + {TempMap2, NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + TargetMod, TargetCtx, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + ?add_spills(Options, NewSpillIndex), + {NewCFG, Liveness, Coloring2, NewSpillIndex2}. -alloc(Defun, SpillIndex, SpillLimit, Options) -> +alloc(CFG, Liveness, SpillIndex, SpillLimit, Options) -> ?inc_counter(ra_iteration_counter,1), %% ?HIPE_X86_PP:pp(Defun), - CFG = hipe_x86_cfg:init(Defun), - {Coloring, NewSpillIndex} = + {Coloring, NewSpillIndex} = regalloc( - CFG, + CFG, Liveness, ?HIPE_X86_REGISTERS:allocatable()-- [?HIPE_X86_REGISTERS:temp1(), ?HIPE_X86_REGISTERS:temp0()], [hipe_x86_cfg:start_label(CFG)], SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC), - {NewDefun, _DidSpill} = + ?HIPE_X86_SPECIFIC, no_context), + {NewCFG, _DidSpill} = ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( - Defun, Coloring, 'linearscan'), + CFG, Coloring, 'linearscan'), %% ?HIPE_X86_PP:pp(NewDefun), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), - {TempMap2,NewSpillIndex2} = - hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, - ?HIPE_X86_SPECIFIC, TempMap), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, Liveness, [], SpillIndex, Options, + ?HIPE_X86_SPECIFIC, no_context, TempMap), Coloring2 = hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), case proplists:get_bool(verbose_spills, Options) of @@ -79,8 +96,9 @@ alloc(Defun, SpillIndex, SpillLimit, Options) -> ok end, ?add_spills(Options, NewSpillIndex), - {NewDefun, Coloring2}. + {NewCFG, Liveness, Coloring2}. -regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> - hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, - DontSpill, Options, Target). +regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, + TgtMod, TgtCtx) -> + hipe_ls_regalloc:regalloc(CFG, Liveness, PhysRegs, Entrypoints, SpillIndex, + DontSpill, Options, TgtMod, TgtCtx). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 0ef4ef0a04..f96c662d18 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -1,9 +1,5 @@ %%% -*- erlang-indent-level: 2 -*- %%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% %%% %%% simple local x86 regalloc @@ -33,15 +27,14 @@ -endif. -module(?HIPE_X86_RA_NAIVE). --export([ra/3]). +-export([ra/4]). -include("../x86/hipe_x86.hrl"). -define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation -include("../main/hipe.hrl"). -ra(X86Defun, Coloring_fp, Options) -> - #defun{code=Code0} = X86Defun, - Code1 = do_insns(Code0), +ra(CFG0, Liveness, Coloring_fp, Options) -> + CFG = hipe_x86_cfg:map_bbs(fun do_bb/2, CFG0), NofSpilledFloats = count_non_float_spills(Coloring_fp), NofFloats = length(Coloring_fp), ?add_spills(Options, hipe_gensym:get_var(x86) - @@ -49,15 +42,17 @@ ra(X86Defun, Coloring_fp, Options) -> NofSpilledFloats - NofFloats), TempMap = [], - {X86Defun#defun{code=Code1, - var_range={0, hipe_gensym:get_var(x86)}}, + {CFG, Liveness, TempMap}. +do_bb(_Lbl, BB) -> + hipe_bb:code_update(BB, do_insns(hipe_bb:code(BB))). + count_non_float_spills(Coloring_fp) -> count_non_float_spills(Coloring_fp, 0). count_non_float_spills([{_,To}|Tail], Num) -> - case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of + case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To, no_context) of true -> count_non_float_spills(Tail, Num); false -> @@ -99,6 +94,8 @@ do_insn(I) -> % Insn -> Insn list do_fp_binop(I); #shift{} -> do_shift(I); + #test{} -> + do_test(I); #label{} -> [I]; #pseudo_jcc{} -> @@ -309,6 +306,11 @@ do_shift(I) -> FixDst ++ [I#shift{dst=Dst}] end. +do_test(I) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}]. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index 0a70bd1d22..db6391d5c1 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,9 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% -%% -ifdef(HIPE_AMD64). -define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). @@ -40,14 +33,18 @@ -include("../main/hipe.hrl"). -define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). -check_and_rewrite(Defun, Coloring, Strategy) -> +check_and_rewrite(CFG, Coloring, Strategy) -> %% io:format("Converting\n"), - TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC, no_context), %% io:format("Rewriting\n"), - #defun{code=Code0} = Defun, - {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), - {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, - DidSpill}. + do_bbs(hipe_x86_cfg:labels(CFG), TempMap, Strategy, CFG, false). + +do_bbs([], _, _, CFG, DidSpill) -> {CFG, DidSpill}; +do_bbs([Lbl|Lbls], TempMap, Strategy, CFG0, DidSpill0) -> + Code0 = hipe_bb:code(BB = hipe_x86_cfg:bb(CFG0, Lbl)), + {Code, DidSpill} = do_insns(Code0, TempMap, Strategy, [], DidSpill0), + CFG = hipe_x86_cfg:bb_add(CFG0, Lbl, hipe_bb:code_update(BB, Code)), + do_bbs(Lbls, TempMap, Strategy, CFG, DidSpill). do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), @@ -77,8 +74,12 @@ do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} do_movx(I, TempMap, Strategy); #fmove{} -> do_fmove(I, TempMap, Strategy); + #pseudo_spill_move{} -> + do_pseudo_spill_move(I, TempMap, Strategy); #shift{} -> do_shift(I, TempMap, Strategy); + #test{} -> + do_test(I, TempMap, Strategy); _ -> %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, %% pseudo_tailcall_prepare, push, ret @@ -169,24 +170,41 @@ do_jmp_switch(I, TempMap, Strategy) -> %%% Fix a lea op. do_lea(I, TempMap, Strategy) -> - #lea{temp=Temp} = I, - case is_spilled(Temp, TempMap) of - false -> - {[I], false}; - true -> - NewTmp = spill_temp('untagged', Strategy), - {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], - true} + #lea{mem=Mem0,temp=Temp0} = I, + {FixMem, Mem, DidSpill1} = fix_mem_operand(Mem0, TempMap, temp1(Strategy)), + case Mem of + #x86_mem{base=Base, off=#x86_imm{value=0}} -> + %% We've decayed into a move due to both operands being memory (there's an + %% 'add' in FixMem). + {FixMem ++ [hipe_x86:mk_move(Base, Temp0)], DidSpill1}; + #x86_mem{} -> + {StoreTemp, Temp, DidSpill2} = + case is_mem_opnd(Temp0, TempMap) of + false -> {[], Temp0, false}; + true -> + Temp1 = clone2(Temp0, temp0(Strategy)), + {[hipe_x86:mk_move(Temp1, Temp0)], Temp1, true} + end, + {FixMem ++ [I#lea{mem=Mem,temp=Temp} | StoreTemp], DidSpill1 or DidSpill2} end. %%% Fix a move op. do_move(I, TempMap, Strategy) -> #move{src=Src0,dst=Dst0} = I, - {FixSrc, Src, FixDst, Dst, DidSpill} = - do_check_byte_move(Src0, Dst0, TempMap, Strategy), - {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], - DidSpill}. + case + is_record(Src0, x86_temp) andalso is_record(Dst0, x86_temp) + andalso is_spilled(Src0, TempMap) andalso is_spilled(Dst0, TempMap) + of + true -> + Tmp = clone(Src0, Strategy), + {[hipe_x86:mk_pseudo_spill_move(Src0, Tmp, Dst0)], true}; + false -> + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_check_byte_move(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], + DidSpill} + end. -ifdef(HIPE_AMD64). @@ -280,6 +298,13 @@ do_fmove(I, TempMap, Strategy) -> {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], DidSpill1 or DidSpill2}. +%%% Fix an pseudo_spill_move op. + +do_pseudo_spill_move(I = #pseudo_spill_move{temp=Temp}, TempMap, _Strategy) -> + %% Temp is above the low water mark and must not have been spilled + false = is_spilled(Temp, TempMap), + {[I], false}. % nothing to do + %%% Fix a shift operation. %%% 1. remove pseudos from any explicit memory operands %%% 2. if the source is a register or memory position @@ -296,6 +321,14 @@ do_shift(I, TempMap, Strategy) -> {FixDst ++ [I#shift{dst=Dst}], DidSpill} end. +%%% Fix a test op. + +do_test(I, TempMap, Strategy) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, @@ -377,19 +410,12 @@ is_mem_opnd(Opnd, TempMap) -> Reg = hipe_x86:temp_reg(Opnd), case hipe_x86:temp_is_allocatable(Opnd) of true -> - case tuple_size(TempMap) > Reg of + case + hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case - hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> false - end; - _ -> - %% impossible, but was true in ls post and false in normal post - exit({?MODULE,is_mem_opnd,Reg}), - false + ?count_temp(Reg), + true; + false -> false end; false -> true end; @@ -404,15 +430,10 @@ is_spilled(Temp, TempMap) -> case hipe_x86:temp_is_allocatable(Temp) of true -> Reg = hipe_x86:temp_reg(Temp), - case tuple_size(TempMap) > Reg of + case hipe_temp_map:is_spilled(Reg, TempMap) of true -> - case hipe_temp_map:is_spilled(Reg, TempMap) of - true -> - ?count_temp(Reg), - true; - false -> - false - end; + ?count_temp(Reg), + true; false -> false end; @@ -429,14 +450,14 @@ clone(Dst, Strategy) -> end, spill_temp(Type, Strategy). -spill_temp0(Type, 'normal') -> +spill_temp0(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp0(Type, 'linearscan') -> +spill_temp0(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). -spill_temp(Type, 'normal') -> +spill_temp(Type, 'normal') when Type =/= double -> hipe_x86:mk_new_temp(Type); -spill_temp(Type, 'linearscan') -> +spill_temp(Type, 'linearscan') when Type =/= double -> hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). %%% Make a certain reg into a clone of Dst @@ -448,6 +469,6 @@ clone2(Dst, RegOpt) -> #x86_temp{} -> hipe_x86:temp_type(Dst) end, case RegOpt of - [] -> hipe_x86:mk_new_temp(Type); + [] when Type =/= double -> hipe_x86:mk_new_temp(Type); Reg -> hipe_x86:mk_temp(Reg, Type) end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl deleted file mode 100644 index 1ee76e5948..0000000000 --- a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl +++ /dev/null @@ -1,64 +0,0 @@ -%% $Id$ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2006-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - -%% Linear Scan register allocator for x87 - --ifdef(HIPE_AMD64). --define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). --define(HIPE_X86_PP, hipe_amd64_pp). --define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). --else. --define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). --define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). --define(HIPE_X86_PP, hipe_x86_pp). --define(HIPE_X86_RA_LS, hipe_x86_ra_ls). --endif. - --module(?HIPE_X86_RA_X87_LS). --export([ra/2]). - -%%-define(DEBUG,1). - --define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. --include("../main/hipe.hrl"). - -ra(Defun, Options) -> - ?inc_counter(ra_calls_counter,1), - CFG = hipe_x86_cfg:init(Defun), - %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), - SpillIndex = 0, - SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), - ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), - - ?inc_counter(ra_iteration_counter,1), - %% ?HIPE_X86_PP:pp(Defun), - Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? - - {Coloring,NewSpillIndex} = - ?HIPE_X86_RA_LS:regalloc(Cfg, - ?HIPE_X86_SPECIFIC_X87:allocatable(), - [hipe_x86_cfg:start_label(Cfg)], - SpillIndex, SpillLimit, Options, - ?HIPE_X86_SPECIFIC_X87), - - ?add_spills(Options, NewSpillIndex), - {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl index 179d734501..dbff68ad28 100644 --- a/lib/hipe/x86/hipe_x86_registers.erl +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -1,8 +1,3 @@ -%%% -%%% %CopyrightBegin% -%%% -%%% Copyright Ericsson AB 2001-2016. All Rights Reserved. -%%% %%% Licensed under the Apache License, Version 2.0 (the "License"); %%% you may not use this file except in compliance with the License. %%% You may obtain a copy of the License at @@ -14,9 +9,6 @@ %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %%% See the License for the specific language governing permissions and %%% limitations under the License. -%%% -%%% %CopyrightEnd% -%%% %%% %%% TODO: %%% - Do we need a pseudo reg for the condition codes? @@ -224,6 +216,8 @@ ret(N) -> exit({?MODULE, ret, N}) end. +%% Note: the fact that (allocatable() UNION allocatable_x87()) is a subset of +%% call_clobbered() is hard-coded in hipe_x86_defuse:insn_defs_all/1 call_clobbered() -> [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? {?EDX,tagged},{?EDX,untagged}, diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl index f17b91f33b..90edef31f3 100644 --- a/lib/hipe/x86/hipe_x86_spill_restore.erl +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2008-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -16,8 +12,6 @@ %% See the License for the specific language governing permissions and %% limitations under the License. %% -%% %CopyrightEnd% -%% %% ==================================================================== %% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008) %% ==================================================================== @@ -25,13 +19,11 @@ -ifdef(HIPE_AMD64). -define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). -define(HIPE_X86_LIVENESS, hipe_amd64_liveness). --define(HIPE_X86_SPECIFIC, hipe_amd64_specific). -define(HIPE_X86_REGISTERS, hipe_amd64_registers). -define(X86STR, "amd64"). -else. -define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). -define(HIPE_X86_LIVENESS, hipe_x86_liveness). --define(HIPE_X86_SPECIFIC, hipe_x86_specific). -define(HIPE_X86_REGISTERS, hipe_x86_registers). -define(X86STR, "x86"). -endif. @@ -51,15 +43,13 @@ -include("../flow/cfg.hrl"). % Added for the definition of #cfg{} %% Main function -spill_restore(Defun, Options) -> - CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), - CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), - hipe_x86_cfg:linearise(CFGFinal). +spill_restore(CFG0, Options) -> + CFG1 = ?option_time(firstPass(CFG0), ?X86STR" First Pass", Options), + ?option_time(secondPass(CFG1), ?X86STR" Second Pass", Options). %% Performs the first pass of the algorithm. %% By working bottom up, introduce the pseudo_spills. -firstPass(Defun) -> - CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), +firstPass(CFG0) -> %% get the labels bottom up Labels = hipe_x86_cfg:postorder(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), diff --git a/lib/hipe/x86/hipe_x86_subst.erl b/lib/hipe/x86/hipe_x86_subst.erl new file mode 100644 index 0000000000..7db3b23d92 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_subst.erl @@ -0,0 +1,112 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SUBST, hipe_amd64_subst). +-else. +-define(HIPE_X86_SUBST, hipe_x86_subst). +-endif. + +-module(?HIPE_X86_SUBST). +-export([insn_temps/2, insn_lbls/2]). +-include("../x86/hipe_x86.hrl"). + +%% These should be moved to hipe_x86 and exported +-type temp() :: #x86_temp{}. +-type oper() :: temp() | #x86_imm{} | #x86_mem{}. +-type mfarec() :: #x86_mfa{}. +-type prim() :: #x86_prim{}. +-type funv() :: mfarec() | prim() | temp(). +-type label() :: non_neg_integer(). +-type insn() :: tuple(). % for now + +-type subst_fun() :: fun((temp()) -> temp()). + +%% @doc Maps over the temporaries in an instruction +-spec insn_temps(subst_fun(), insn()) -> insn(). +insn_temps(SubstTemp, I) -> + O = fun(O) -> oper_temps(SubstTemp, O) end, + case I of + #alu {src=S, dst=D} -> I#alu {src=O(S), dst=O(D)}; + #cmovcc {src=S, dst=D} -> I#cmovcc {src=O(S), dst=O(D)}; + #cmp {src=S, dst=D} -> I#cmp {src=O(S), dst=O(D)}; + #fmove {src=S, dst=D} -> I#fmove {src=O(S), dst=O(D)}; + #fp_binop{src=S, dst=D} -> I#fp_binop{src=O(S), dst=O(D)}; + #imul {src=S, temp=T} -> I#imul {src=O(S), temp=O(T)}; + #lea {mem=M, temp=T} -> I#lea {mem=O(M), temp=O(T)}; + #move {src=S, dst=D} -> I#move {src=O(S), dst=O(D)}; + #movsx {src=S, dst=D} -> I#movsx {src=O(S), dst=O(D)}; + #movzx {src=S, dst=D} -> I#movzx {src=O(S), dst=O(D)}; + #shift {src=S, dst=D} -> I#shift {src=O(S), dst=O(D)}; + #test {src=S, dst=D} -> I#test {src=O(S), dst=O(D)}; + #fp_unop{arg=[]} -> I; + #fp_unop{arg=A} -> I#fp_unop{arg=O(A)}; + #move64 {dst=D} -> I#move64 {dst=O(D)}; + #push {src=S} -> I#push {src=O(S)}; + #pop {dst=D} -> I#pop {dst=O(D)}; + #jmp_switch{temp=T, jtab=J} -> + I#jmp_switch{temp=O(T), jtab=jtab_temps(SubstTemp, J)}; + #pseudo_call{'fun'=F} -> + I#pseudo_call{'fun'=funv_temps(SubstTemp, F)}; + #pseudo_spill_fmove{src=S, temp=T, dst=D} -> + I#pseudo_spill_fmove{src=O(S), temp=O(T), dst=O(D)}; + #pseudo_spill_move{src=S, temp=T, dst=D} -> + I#pseudo_spill_move{src=O(S), temp=O(T), dst=O(D)}; + #pseudo_tailcall{'fun'=F, stkargs=Stk} -> + I#pseudo_tailcall{'fun'=funv_temps(SubstTemp, F), + stkargs=lists:map(O, Stk)}; + #comment{} -> I; + #jmp_label{} -> I; + #pseudo_tailcall_prepare{} -> I; + #pseudo_jcc{} -> I; + #ret{} -> I + end. + +-spec oper_temps(subst_fun(), oper()) -> oper(). +oper_temps(_SubstTemp, I=#x86_imm{}) -> I; +oper_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T); +oper_temps(SubstTemp, M=#x86_mem{base=Base,off=Off}) -> + M#x86_mem{base=oper_temps(SubstTemp, Base), + off =oper_temps(SubstTemp, Off)}. + +-spec funv_temps(subst_fun(), funv()) -> funv(). +funv_temps(_SubstTemp, MFA=#x86_mfa{}) -> MFA; +funv_temps(_SubstTemp, P=#x86_prim{}) -> P; +funv_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). + +%% TODO: Undo this ifdeffery at the source (make jtab an #x86_imm{} on x86) +-ifdef(HIPE_AMD64). +jtab_temps(SubstTemp, T=#x86_temp{}) -> SubstTemp(T). +-else. +jtab_temps(_SubstTemp, DataLbl) when is_integer(DataLbl) -> DataLbl. +-endif. + +-type lbl_subst_fun() :: fun((label()) -> label()). + +%% @doc Maps over the branch targets in an instruction +-spec insn_lbls(lbl_subst_fun(), insn()) -> insn(). +insn_lbls(SubstLbl, I) -> + case I of + #jmp_label{label=Label} -> + I#jmp_label{label=SubstLbl(Label)}; + #pseudo_call{sdesc=Sdesc, contlab=Contlab} -> + I#pseudo_call{sdesc=sdesc_lbls(SubstLbl, Sdesc), + contlab=SubstLbl(Contlab)}; + #pseudo_jcc{true_label=T, false_label=F} -> + I#pseudo_jcc{true_label=SubstLbl(T), false_label=SubstLbl(F)} + end. + +sdesc_lbls(_SubstLbl, Sdesc=#x86_sdesc{exnlab=[]}) -> Sdesc; +sdesc_lbls(SubstLbl, Sdesc=#x86_sdesc{exnlab=Exnlab}) -> + Sdesc#x86_sdesc{exnlab=SubstLbl(Exnlab)}. diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl index e874490252..85268ab85a 100644 --- a/lib/hipe/x86/hipe_x86_x87.erl +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -1,9 +1,5 @@ %% -*- erlang-indent-level: 2 -*- %% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2005-2016. All Rights Reserved. -%% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at @@ -15,8 +11,6 @@ %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. -%% -%% %CopyrightEnd% %% %% Floating point handling. @@ -41,13 +35,12 @@ %%---------------------------------------------------------------------- -map(Defun) -> - CFG0 = hipe_x86_cfg:init(Defun), +map(CFG0) -> %% hipe_x86_cfg:pp(CFG0), Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), StartLabel = hipe_x86_cfg:start_label(CFG0), {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), - hipe_x86_cfg:linearise(CFG1). + CFG1. do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> case gb_trees:lookup(Lbl, BlockMap) of |