diff options
author | Sverker Eriksson <[email protected]> | 2016-11-22 12:02:07 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2016-11-22 12:02:07 +0100 |
commit | 3d7b55f946162b5a129241dbe67397784a1ba1a5 (patch) | |
tree | 8a3809296bdfcdd16ebbf78975ea18034b22d62c | |
parent | 9491f6727f12e37241863bd5becbd1f336ff7659 (diff) | |
parent | fda3c9575d77bed0250f76f17e92d18836e15d0c (diff) | |
download | otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.gz otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.tar.bz2 otp-3d7b55f946162b5a129241dbe67397784a1ba1a5.zip |
Merge branch 'margnus1/hipe/refactor-rtl/PR-1243'
* margnus1/hipe/refactor-rtl/PR-1243:
hipe_x86: Fix encoding of test instr w/ neg imm
hipe_tagscheme: Simplify test_two_fixnums with imm
hipe_icode: Always const-propagate if&call args
hipe_tagscheme: x86 lea+test for mask_and_compare
hipe_tagscheme: Improve fixnum_addsub with imm
hipe: Make realloc_binary fast case true branch
hipe_x86_postpass: Negate conditions in goto elim
hipe_tagscheme: Improve fixnum add on x86
hipe_rtl_to_x86: Use LEA only for immediate adds
hipe_x86: LeaToAdd peephole rule
hipe_sparc: move coalescing
hipe_ppc: better rlwinm pp
hipe_ppc: move coalescing
hipe_rtl: drop alub dest when unused
hipe_rtl: unify branch and alub
hipe_x86: Fix&activate ElimCmp0 peephole rule
hipe_{x86,amd64}: Finish test instr implementation
30 files changed, 598 insertions, 537 deletions
diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl index df15732cea..16bd705055 100644 --- a/lib/hipe/amd64/hipe_amd64_encode.erl +++ b/lib/hipe/amd64/hipe_amd64_encode.erl @@ -63,7 +63,7 @@ -export([% condition codes cc/1, % 8-bit registers - %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + %% al/0, cl/0, dl/0, bl/0, % 32-bit registers %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, % operands @@ -127,19 +127,15 @@ cc(g) -> ?CC_G. -define(CL, 2#001). -define(DL, 2#010). -define(BL, 2#011). --define(AH, 2#100). --define(CH, 2#101). --define(DH, 2#110). --define(BH, 2#111). +-define(SPL, 2#100). +-define(BPL, 2#101). +-define(SIL, 2#110). +-define(DIL, 2#111). %% al() -> ?AL. %% cl() -> ?CL. %% dl() -> ?DL. %% bl() -> ?BL. -%% ah() -> ?AH. -%% ch() -> ?CH. -%% dh() -> ?DH. -%% bh() -> ?BH. %%% 32-bit registers @@ -208,6 +204,7 @@ rex_([]) -> 0; rex_([{r8, Reg8}| Rest]) -> % 8 bit registers case Reg8 of {rm_mem, _} -> rex_(Rest); + {rm_reg, R} -> rex_([{r8, R} | Rest]); 4 -> (1 bsl 8) bor rex_(Rest); 5 -> (1 bsl 8) bor rex_(Rest); 6 -> (1 bsl 8) bor rex_(Rest); @@ -825,12 +822,26 @@ shd_op_encode(Opcode, Opnds) -> test_encode(Opnds) -> case Opnds of + {al, {imm8,Imm8}} -> + [16#A8, Imm8]; + {ax, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, 16#A9 | le16(Imm16, [])]; {eax, {imm32,Imm32}} -> [16#A9 | le32(Imm32, [])]; + {rax, {imm32,Imm32}} -> + [rex([{w,1}]), 16#A9 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [rex([{r8,RM8}]), 16#F6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND_16BITS, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; {{rm32,RM32}, {imm32,Imm32}} -> [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm64,RM64}, {imm32,Imm32}} -> + [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#000, le32(Imm32, []))]; {{rm32,RM32}, {reg32,Reg32}} -> - [16#85 | encode_rm(RM32, Reg32, [])] + [16#85 | encode_rm(RM32, Reg32, [])]; + {{rm64,RM64}, {reg64,Reg64}} -> + [rex([{w,1}]), 16#85 | encode_rm(RM64, Reg64, [])] end. %% test_sizeof(Opnds) -> @@ -1309,18 +1320,21 @@ dotest1(OS) -> Imm32 = {imm32,Word32}, Imm16 = {imm16,Word16}, Imm8 = {imm8,Word8}, + RM64 = {rm64,rm_reg(?EDX)}, RM32 = {rm32,rm_reg(?EDX)}, RM16 = {rm16,rm_reg(?EDX)}, RM8 = {rm8,rm_reg(?EDX)}, + RM8REX = {rm8,rm_reg(?SIL)}, Rel32 = {rel32,Word32}, Rel8 = {rel8,Word8}, Moffs32 = {moffs32,Word32}, Moffs16 = {moffs16,Word32}, Moffs8 = {moffs8,Word32}, CC = {cc,?CC_G}, + Reg64 = {reg64,?EAX}, Reg32 = {reg32,?EAX}, Reg16 = {reg16,?EAX}, - Reg8 = {reg8,?AH}, + Reg8 = {reg8,?SPL}, EA = {ea,ea_base(?ECX)}, % exercise each instruction definition t(OS,'adc',{eax,Imm32}), @@ -1465,9 +1479,17 @@ dotest1(OS) -> t(OS,'sub',{RM32,Imm8}), t(OS,'sub',{RM32,Reg32}), t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{al,Imm8}), + t(OS,'test',{ax,Imm16}), t(OS,'test',{eax,Imm32}), + t(OS,'test',{rax,Imm32}), + t(OS,'test',{RM8,Imm8}), + t(OS,'test',{RM8REX,Imm8}), + t(OS,'test',{RM16,Imm16}), t(OS,'test',{RM32,Imm32}), + t(OS,'test',{RM64,Imm32}), t(OS,'test',{RM32,Reg32}), + t(OS,'test',{RM64,Reg64}), t(OS,'xor',{eax,Imm32}), t(OS,'xor',{RM32,Imm32}), t(OS,'xor',{RM32,Imm8}), diff --git a/lib/hipe/arm/hipe_rtl_to_arm.erl b/lib/hipe/arm/hipe_rtl_to_arm.erl index 2f9181d517..c964c222aa 100644 --- a/lib/hipe/arm/hipe_rtl_to_arm.erl +++ b/lib/hipe/arm/hipe_rtl_to_arm.erl @@ -62,7 +62,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -111,6 +110,17 @@ commute_arithop(ArithOp) -> _ -> ArithOp end. +conv_cmpop('add') -> 'cmn'; +conv_cmpop('sub') -> 'cmp'; +conv_cmpop('and') -> 'tst'; +conv_cmpop('xor') -> 'teq'; +conv_cmpop(_) -> none. + +cmpop_commutes('cmp') -> false; +cmpop_commutes('cmn') -> true; +cmpop_commutes('tst') -> true; +cmpop_commutes('teq') -> true. + mk_alu(S, Dst, Src1, RtlAluOp, Src2) -> case hipe_rtl:is_shift_op(RtlAluOp) of true -> @@ -223,71 +233,77 @@ fix_aluop_imm(AluOp, Imm) -> % {FixAm1,NewAluOp,Am1} conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), RtlAluOp = hipe_rtl:alub_op(I), - Cond0 = conv_alub_cond(RtlAluOp, hipe_rtl:alub_cond(I)), - Cond = - case {RtlAluOp,Cond0} of - {'mul','vs'} -> 'ne'; % overflow becomes not-equal - {'mul','vc'} -> 'eq'; % no-overflow becomes equal - {'mul',_} -> exit({?MODULE,I}); - {_,_} -> Cond0 - end, - I2 = mk_pseudo_bc( - Cond, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I)), - S = true, - I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), - {I1 ++ I2, Map2, Data}. - -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cond = conv_branch_cond(hipe_rtl:branch_cond(I)), - I2 = mk_branch(Src1, Cond, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {I2, Map1, Data}. + RtlCond = hipe_rtl:alub_cond(I), + HasDst = hipe_rtl:alub_has_dst(I), + CmpOp = conv_cmpop(RtlAluOp), + Cond0 = conv_alub_cond(RtlAluOp, RtlCond), + case (not HasDst) andalso CmpOp =/= none of + true -> + I1 = mk_branch(Src1, CmpOp, Src2, Cond0, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + {I1, Map1, Data}; + false -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + Cond = + case {RtlAluOp,Cond0} of + {'mul','vs'} -> 'ne'; % overflow becomes not-equal + {'mul','vc'} -> 'eq'; % no-overflow becomes equal + {'mul',_} -> exit({?MODULE,I}); + {_,_} -> Cond0 + end, + I2 = mk_pseudo_bc( + Cond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + S = true, + I1 = mk_alu(S, Dst, Src1, RtlAluOp, Src2), + {I1 ++ I2, Map2, Data} + end. -mk_branch(Src1, Cond, Src2, TrueLab, FalseLab, Pred) -> +mk_branch(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) -> case hipe_arm:is_temp(Src1) of true -> case hipe_arm:is_temp(Src2) of true -> - mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred); + mk_branch_rr(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred); _ -> - mk_branch_ri(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + mk_branch_ri(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) end; _ -> case hipe_arm:is_temp(Src2) of true -> - NewCond = commute_cond(Cond), - mk_branch_ri(Src2, NewCond, Src1, TrueLab, FalseLab, Pred); + NewCond = + case cmpop_commutes(CmpOp) of + true -> Cond; + false -> commute_cond(Cond) + end, + mk_branch_ri(Src2, CmpOp, Src1, NewCond, TrueLab, FalseLab, Pred); _ -> - mk_branch_ii(Src1, Cond, Src2, TrueLab, FalseLab, Pred) + mk_branch_ii(Src1, CmpOp, Src2, Cond, TrueLab, FalseLab, Pred) end end. -mk_branch_ii(Imm1, Cond, Imm2, TrueLab, FalseLab, Pred) -> +mk_branch_ii(Imm1, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred) -> Tmp = new_untagged_temp(), mk_li(Tmp, Imm1, - mk_branch_ri(Tmp, Cond, Imm2, + mk_branch_ri(Tmp, CmpOp, Imm2, Cond, TrueLab, FalseLab, Pred)). -mk_branch_ri(Src, Cond, Imm, TrueLab, FalseLab, Pred) -> - {FixAm1,NewCmpOp,Am1} = fix_aluop_imm('cmp', Imm), - FixAm1 ++ mk_cmp_bc(NewCmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred). - -mk_branch_rr(Src1, Src2, Cond, TrueLab, FalseLab, Pred) -> - mk_cmp_bc('cmp', Src1, Src2, Cond, TrueLab, FalseLab, Pred). +mk_branch_ri(Src, CmpOp, Imm, Cond, TrueLab, FalseLab, Pred) -> + {FixAm1,NewCmpOp,Am1} = fix_aluop_imm(CmpOp, Imm), + FixAm1 ++ mk_branch_rr(Src, NewCmpOp, Am1, Cond, TrueLab, FalseLab, Pred). -mk_cmp_bc(CmpOp, Src, Am1, Cond, TrueLab, FalseLab, Pred) -> +mk_branch_rr(Src, CmpOp, Am1, Cond, TrueLab, FalseLab, Pred) -> [hipe_arm:mk_cmp(CmpOp, Src, Am1) | mk_pseudo_bc(Cond, TrueLab, FalseLab, Pred)]. @@ -637,6 +653,7 @@ conv_alub_cond(RtlAluOp, Cond) -> % may be unsigned, depends on aluop case {RtlAluOp, Cond} of % handle allowed alub unsigned conditions {'add', 'ltu'} -> 'hs'; % add+ltu == unsigned overflow == carry set == hs %% add more cases when needed + {'sub', _} -> conv_branch_cond(Cond); _ -> conv_cond(Cond) end. diff --git a/lib/hipe/icode/hipe_icode.erl b/lib/hipe/icode/hipe_icode.erl index 78508dff22..d2d08e0253 100644 --- a/lib/hipe/icode/hipe_icode.erl +++ b/lib/hipe/icode/hipe_icode.erl @@ -438,6 +438,7 @@ if_true_label/1, if_false_label/1, if_args/1, + if_args_update/2, if_pred/1, %% is_if/1, @@ -594,6 +595,7 @@ uses/1, defines/1, is_safe/1, + reduce_unused/1, strip_comments/1, subst/2, subst_uses/2, @@ -713,6 +715,9 @@ if_op_update(IF, NewOp) -> IF#icode_if{op=NewOp}. -spec if_args(#icode_if{}) -> [icode_term_arg()]. if_args(#icode_if{args=Args}) -> Args. +-spec if_args_update(#icode_if{}, [icode_term_arg()]) -> #icode_if{}. +if_args_update(IF, Args) -> IF#icode_if{args=Args}. + -spec if_true_label(#icode_if{}) -> icode_lbl(). if_true_label(#icode_if{true_label=TrueLbl}) -> TrueLbl. @@ -1765,6 +1770,18 @@ is_safe(Instr) -> #icode_end_try{} -> false end. +%% @doc Produces a simplified instruction sequence that is equivalent to [Instr] +%% under the assumption that all results of Instr are unused, or 'false' if +%% there is no such sequence (other than [Instr] itself). + +-spec reduce_unused(icode_instr()) -> false | [icode_instr()]. + +reduce_unused(Instr) -> + case is_safe(Instr) of + true -> []; + false -> false + end. + %%----------------------------------------------------------------------- -spec highest_var(icode_instrs()) -> non_neg_integer(). diff --git a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl index 4ab4d7e95d..5d3d5413bc 100644 --- a/lib/hipe/icode/hipe_icode_ssa_const_prop.erl +++ b/lib/hipe/icode/hipe_icode_ssa_const_prop.erl @@ -97,11 +97,13 @@ visit_expression(Instruction, Environment) -> visit_begin_handler (Instruction, EvaluatedArguments, Environment); #icode_begin_try{} -> visit_begin_try (Instruction, EvaluatedArguments, Environment); - #icode_fail{} -> + #icode_fail{} -> visit_fail (Instruction, EvaluatedArguments, Environment); - _ -> - %% label, end_try, comment, return, - {[], [], Environment} + #icode_comment{} -> {[], [], Environment}; + #icode_end_try{} -> {[], [], Environment}; + #icode_enter{} -> {[], [], Environment}; + #icode_label{} -> {[], [], Environment}; + #icode_return{} -> {[], [], Environment} end. %%----------------------------------------------------------------------------- @@ -463,11 +465,15 @@ update_instruction(Instruction, Environment) -> update_type(Instruction, Environment); #icode_switch_tuple_arity{} -> update_switch_tuple_arity(Instruction, Environment); - _ -> - %% goto, comment, label, return, begin_handler, end_try, - %% begin_try, fail - %% We could but don't handle: catch?, fail? - [Instruction] + %% We could but don't handle: catch?, fail? + #icode_begin_handler{} -> [Instruction]; + #icode_begin_try{} -> [Instruction]; + #icode_comment{} -> [Instruction]; + #icode_end_try{} -> [Instruction]; + #icode_fail{} -> [Instruction]; + #icode_goto{} -> [Instruction]; + #icode_label{} -> [Instruction]; + #icode_return{} -> [Instruction] end. %%----------------------------------------------------------------------------- @@ -502,14 +508,12 @@ update_call(Instruction, Environment) -> [Instruction, NewInstructions]), NewInstructions end; -%% %% [] -> %% No destination; we don't touch this -%% [] -> -%% NewArguments = update_arguments(hipe_icode:call_args(Instruction), -%% Environment), -%% [hipe_icode:call_args_update(Instruction, NewArguments)]; + %% [] -> %% No destination; we don't touch this %% List-> %% Means register allocation; not implemented at this point _ -> - [Instruction] + NewArguments = update_arguments(hipe_icode:call_args(Instruction), + Environment), + [hipe_icode:call_args_update(Instruction, NewArguments)] end. %%----------------------------------------------------------------------------- @@ -574,7 +578,9 @@ update_if(Instruction, Environment) -> %% Convert the if-test to a type test if possible. Op = hipe_icode:if_op(Instruction), case Op =:= '=:=' orelse Op =:= '=/=' of - false -> [Instruction]; + false -> + [hipe_icode:if_args_update( + Instruction, update_arguments(Args, Environment))]; true -> [Arg1, Arg2] = Args, case EvaluatedArguments of @@ -604,8 +610,9 @@ conv_if_to_type(I, Const, Arg) when is_atom(Const); NewI = hipe_icode:mk_type([Arg], Test, T, F), ?CONST_PROP_MSG("if: ~w ---> type ~w\n", [I, NewI]), [NewI]; -conv_if_to_type(I, _, _) -> - [I]. +conv_if_to_type(I, Const, Arg) -> + %% Note: we are potentially commuting the (equality) comparison here + [hipe_icode:if_args_update(I, [Arg, hipe_icode:mk_const(Const)])]. %%----------------------------------------------------------------------------- diff --git a/lib/hipe/llvm/hipe_rtl_to_llvm.erl b/lib/hipe/llvm/hipe_rtl_to_llvm.erl index 20813f8bd7..f8911c1909 100644 --- a/lib/hipe/llvm/hipe_rtl_to_llvm.erl +++ b/lib/hipe/llvm/hipe_rtl_to_llvm.erl @@ -156,9 +156,6 @@ translate_instr(I, Relocs, Data) -> #alub{} -> {I2, Relocs2} = trans_alub(I, Relocs), {I2, Relocs2, Data}; - #branch{} -> - {I2, Relocs2} = trans_branch(I, Relocs), - {I2, Relocs2, Data}; #call{} -> {I2, Relocs2} = case hipe_rtl:call_fun(I) of @@ -255,7 +252,6 @@ trans_alub(I, Relocs) -> trans_alub_overflow(I, Sign, Relocs) -> {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)), {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)), - RtlDst = hipe_rtl:alub_dst(I), TmpDst = mk_temp(), Name = trans_alub_op(I, Sign), NewRelocs = relocs_store(Name, {call, remote, {llvm, Name, 2}}, Relocs), @@ -266,7 +262,10 @@ trans_alub_overflow(I, Sign, Relocs) -> [{WordTy, Src1}, {WordTy, Src2}], []), %% T1{0}: result of the operation I4 = hipe_llvm:mk_extractvalue(TmpDst, ReturnType, T1 , "0", []), - I5 = store_stack_dst(TmpDst, RtlDst), + I5 = case hipe_rtl:alub_has_dst(I) of + false -> []; + true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I)) + end, T2 = mk_temp(), %% T1{1}: Boolean variable indicating overflow I6 = hipe_llvm:mk_extractvalue(T2, ReturnType, T1, "1", []), @@ -311,42 +310,35 @@ trans_alub_op(I, Sign) -> Name ++ Type. trans_alub_no_overflow(I, Relocs) -> + {Src1, I1} = trans_src(hipe_rtl:alub_src1(I)), + {Src2, I2} = trans_src(hipe_rtl:alub_src2(I)), + WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), %% alu - T = hipe_rtl:mk_alu(hipe_rtl:alub_dst(I), hipe_rtl:alub_src1(I), - hipe_rtl:alub_op(I), hipe_rtl:alub_src2(I)), - %% A trans_alu instruction cannot change relocations - {I1, _} = trans_alu(T, Relocs), + {CmpLhs, CmpRhs, I5, Cond} = + case {hipe_rtl:alub_has_dst(I), hipe_rtl:alub_op(I)} of + {false, 'sub'} -> + Cond0 = trans_branch_rel_op(hipe_rtl:alub_cond(I)), + {Src1, Src2, [], Cond0}; + {HasDst, AlubOp} -> + TmpDst = mk_temp(), + Op = trans_op(AlubOp), + I3 = hipe_llvm:mk_operation(TmpDst, Op, WordTy, Src1, Src2, []), + I4 = case HasDst of + false -> []; + true -> store_stack_dst(TmpDst, hipe_rtl:alub_dst(I)) + end, + Cond0 = trans_alub_rel_op(hipe_rtl:alub_cond(I)), + {TmpDst, "0", [I4, I3], Cond0} + end, %% icmp - %% Translate destination as src, to match with the semantics of instruction - {Dst, I2} = trans_src(hipe_rtl:alub_dst(I)), - Cond = trans_rel_op(hipe_rtl:alub_cond(I)), T3 = mk_temp(), - WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), - I5 = hipe_llvm:mk_icmp(T3, Cond, WordTy, Dst, "0"), + I6 = hipe_llvm:mk_icmp(T3, Cond, WordTy, CmpLhs, CmpRhs), %% br Metadata = branch_metadata(hipe_rtl:alub_pred(I)), True_label = mk_jump_label(hipe_rtl:alub_true_label(I)), False_label = mk_jump_label(hipe_rtl:alub_false_label(I)), - I6 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata), - {[I6, I5, I2, I1], Relocs}. - -%% -%% branch -%% -trans_branch(I, Relocs) -> - {Src1, I1} = trans_src(hipe_rtl:branch_src1(I)), - {Src2, I2} = trans_src(hipe_rtl:branch_src2(I)), - Cond = trans_rel_op(hipe_rtl:branch_cond(I)), - %% icmp - T1 = mk_temp(), - WordTy = hipe_llvm:mk_int(?BITS_IN_WORD), - I3 = hipe_llvm:mk_icmp(T1, Cond, WordTy, Src1, Src2), - %% br - True_label = mk_jump_label(hipe_rtl:branch_true_label(I)), - False_label = mk_jump_label(hipe_rtl:branch_false_label(I)), - Metadata = branch_metadata(hipe_rtl:branch_pred(I)), - I4 = hipe_llvm:mk_br_cond(T1, True_label, False_label, Metadata), - {[I4, I3, I2, I1], Relocs}. + I7 = hipe_llvm:mk_br_cond(T3, True_label, False_label, Metadata), + {[I7, I6, I5, I2, I1], Relocs}. branch_metadata(X) when X =:= 0.5 -> []; branch_metadata(X) when X > 0.5 -> ?BRANCH_META_TAKEN; @@ -1162,7 +1154,7 @@ trans_dst(A) -> true -> "%DL" ++ integer_to_list(hipe_rtl:const_label_label(A)) ++ "_var"; false -> - exit({?MODULE, trans_dst, {"Bad RTL argument",A}}) + error(badarg, [A]) end end end, @@ -1260,14 +1252,19 @@ trans_op(Op) -> Other -> exit({?MODULE, trans_op, {"Unknown RTL operator", Other}}) end. -trans_rel_op(Op) -> +trans_branch_rel_op(Op) -> case Op of - eq -> eq; - ne -> ne; gtu -> ugt; geu -> uge; ltu -> ult; leu -> ule; + _ -> trans_alub_rel_op(Op) + end. + +trans_alub_rel_op(Op) -> + case Op of + eq -> eq; + ne -> ne; gt -> sgt; ge -> sge; lt -> slt; @@ -1300,7 +1297,10 @@ insn_dst(I) -> #alu{} -> [hipe_rtl:alu_dst(I)]; #alub{} -> - [hipe_rtl:alub_dst(I)]; + case hipe_rtl:alub_has_dst(I) of + true -> [hipe_rtl:alub_dst(I)]; + false -> [] + end; #call{} -> case hipe_rtl:call_dstlist(I) of [] -> []; diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl index 8d37159ad8..58924409a8 100644 --- a/lib/hipe/ppc/hipe_ppc_frame.erl +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -98,7 +98,10 @@ do_pseudo_move(I, Context, FPoff) -> Offset = pseudo_offset(Src, FPoff, Context), mk_load(hipe_ppc:ldop_word(), Dst, Offset, mk_sp(), []); _ -> - [hipe_ppc:mk_alu('or', Dst, Src, Src)] + case hipe_ppc:temp_reg(Dst) =:= hipe_ppc:temp_reg(Src) of + true -> []; + false -> [hipe_ppc:mk_alu('or', Dst, Src, Src)] + end end end. diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl index e69e6b64a2..0ff7a76bce 100644 --- a/lib/hipe/ppc/hipe_ppc_pp.erl +++ b/lib/hipe/ppc/hipe_ppc_pp.erl @@ -170,6 +170,12 @@ pp_insn(Dev, I, Pre) -> io:format(Dev, ", ", []), pp_temp(Dev, Base2), io:format(Dev, "\n", []); + #unary{unop={UnOp,I1,I2,I3}, dst=Dst, src=Src} -> + io:format(Dev, "\t~s ", [UnOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ~s, ~s, ~s\n", [to_hex(I1),to_hex(I2),to_hex(I3)]); #unary{unop=UnOp, dst=Dst, src=Src} -> io:format(Dev, "\t~w ", [unop_name(UnOp)]), pp_temp(Dev, Dst), diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl index a01e67a789..09f1ce5a49 100644 --- a/lib/hipe/ppc/hipe_rtl_to_ppc.erl +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -80,7 +80,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -441,36 +440,53 @@ mk_alu_rr(Dst, Src1, RtlAluOp, Src2) -> conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), - {AluOp, BCond} = - case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of - {'add', 'ltu'} -> - {'addc', 'eq'}; - {RtlAlubOp, RtlAlubCond} -> - {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} - end, - BC = mk_pseudo_bc(BCond, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I)), - I2 = - case {AluOp, BCond} of - {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC - TmpR = new_untagged_temp(), - [hipe_ppc:mk_mfspr(TmpR, 'xer'), - hipe_ppc:mk_mtcr(TmpR) | - BC]; - _ -> BC - end, - {NewSrc1, NewSrc2} = - case AluOp of - 'subf' -> {Src2, Src1}; - _ -> {Src1, Src2} - end, - I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), - {I1 ++ I2, Map2, Data}. + HasDst = hipe_rtl:alub_has_dst(I), + {Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), + RtlAlubOp = hipe_rtl:alub_op(I), + RtlAlubCond = hipe_rtl:alub_cond(I), + case {HasDst, RtlAlubOp} of + {false, sub} -> + {BCond,Sign} = conv_branch_cond(RtlAlubCond), + I2 = mk_branch(Src1, BCond, Sign, Src2, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + {I2, Map1, Data}; + _ -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + {AluOp, BCond} = + case {RtlAlubOp, RtlAlubCond} of + {'add', 'ltu'} -> + {'addc', 'eq'}; + {_, _} -> + {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} + end, + BC = mk_pseudo_bc(BCond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + I2 = + case {AluOp, BCond} of + {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC + TmpR = new_untagged_temp(), + [hipe_ppc:mk_mfspr(TmpR, 'xer'), + hipe_ppc:mk_mtcr(TmpR) | + BC]; + _ -> BC + end, + {NewSrc1, NewSrc2} = + case AluOp of + 'subf' -> {Src2, Src1}; + _ -> {Src1, Src2} + end, + I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), + {I1 ++ I2, Map2, Data} + end. conv_alub_op(RtlAluOp) -> case {get(hipe_target_arch), RtlAluOp} of @@ -689,17 +705,6 @@ mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) -> end, [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)]. -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)), - I2 = mk_branch(Src1, BCond, Sign, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {I2, Map1, Data}. - conv_branch_cond(Cond) -> % may be unsigned case Cond of gtu -> {'gt', 'unsigned'}; diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl index 0726827299..d39969a0ed 100644 --- a/lib/hipe/rtl/hipe_rtl.erl +++ b/lib/hipe/rtl/hipe_rtl.erl @@ -187,18 +187,14 @@ mk_branch/5, mk_branch/6, - branch_src1/1, - branch_src2/1, - branch_cond/1, - branch_true_label/1, - branch_false_label/1, - branch_pred/1, + mk_branch/7, %% is_branch/1, %% branch_true_label_update/2, %% branch_false_label_update/2, mk_alub/7, mk_alub/8, + alub_has_dst/1, alub_dst/1, alub_src1/1, alub_op/1, @@ -338,6 +334,7 @@ defines/1, redirect_jmp/3, is_safe/1, + reduce_unused/1, %% highest_var/1, pp/1, pp/2, @@ -588,37 +585,25 @@ is_label(#label{}) -> true; is_label(_) -> false. %% -%% branch -%% - -mk_branch(Src1, Op, Src2, True, False) -> - mk_branch(Src1, Op, Src2, True, False, 0.5). -mk_branch(Src1, Op, Src2, True, False, P) -> - #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True, - false_label=False, p=P}. -branch_src1(#branch{src1=Src1}) -> Src1. -branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}. -branch_src2(#branch{src2=Src2}) -> Src2. -branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}. -branch_cond(#branch{'cond'=Cond}) -> Cond. -branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl. -branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}. -branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl. -branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}. -branch_pred(#branch{p=P}) -> P. - -%% %% alub %% -type alub_cond() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le' | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'. +mk_branch(Src1, Cond, Src2, True, False) -> + mk_branch(Src1, Cond, Src2, True, False, 0.5). +mk_branch(Src1, Cond, Src2, True, False, P) -> + mk_branch(Src1, 'sub', Src2, Cond, True, False, P). +mk_branch(Src1, Op, Src2, Cond, True, False, P) -> + mk_alub([], Src1, Op, Src2, Cond, True, False, P). + mk_alub(Dst, Src1, Op, Src2, Cond, True, False) -> mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5). mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) -> #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond, true_label=True, false_label=False, p=P}. +alub_has_dst(#alub{dst=Dst}) -> Dst =/= []. alub_dst(#alub{dst=Dst}) -> Dst. alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}. alub_src1(#alub{src1=Src1}) -> Src1. @@ -943,8 +928,7 @@ args(I) -> case I of #alu{} -> [alu_src1(I), alu_src2(I)]; #alub{} -> [alub_src1(I), alub_src2(I)]; - #branch{} -> [branch_src1(I), branch_src2(I)]; - #call{} -> + #call{} -> Args = call_arglist(I) ++ hipe_rtl_arch:call_used(), case call_is_known(I) of false -> [call_fun(I) | Args]; @@ -987,8 +971,8 @@ args(I) -> defines(Instr) -> Defs = case Instr of #alu{} -> [alu_dst(Instr)]; + #alub{dst=[]} -> []; #alub{} -> [alub_dst(Instr)]; - #branch{} -> []; #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined(); #comment{} -> []; #enter{} -> []; @@ -1042,9 +1026,6 @@ subst_uses(Subst, I) -> #alub{} -> I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))), alub_src2_update(I0, subst1(Subst, alub_src2(I))); - #branch{} -> - I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))), - branch_src2_update(I0, subst1(Subst, branch_src2(I))); #call{} -> case call_is_known(I) of false -> @@ -1126,11 +1107,6 @@ subst_uses_llvm(Subst, I) -> {NewSrc1, _ } = subst1_llvm(Subst1, alub_src1(I)), I0 = alub_src1_update(I, NewSrc1), alub_src2_update(I0, NewSrc2); - #branch{} -> - {NewSrc2, Subst1} = subst1_llvm(Subst, branch_src2(I)), - {NewSrc1, _ } = subst1_llvm(Subst1, branch_src1(I)), - I0 = branch_src1_update(I, NewSrc1), - branch_src2_update(I0, NewSrc2); #call{} -> case call_is_known(I) of false -> @@ -1243,10 +1219,10 @@ subst_defines(Subst, I)-> case I of #alu{} -> alu_dst_update(I, subst1(Subst, alu_dst(I))); + #alub{dst=[]} -> + I; #alub{} -> alub_dst_update(I, subst1(Subst, alub_dst(I))); - #branch{} -> - I; #call{} -> call_dstlist_update(I, subst_list(Subst, call_dstlist(I))); #comment{} -> @@ -1313,7 +1289,6 @@ is_safe(Instr) -> case Instr of #alu{} -> true; #alub{} -> false; - #branch{} -> false; #call{} -> false; #comment{} -> false; #enter{} -> false; @@ -1340,6 +1315,24 @@ is_safe(Instr) -> #switch{} -> false %% Maybe this is safe... end. +%% @spec reduce_unused(rtl_instruction()) +%% -> false | [rtl_instruction()]. +%% +%% @doc Produces a simplified instruction sequence that is equivalent to [Instr] +%% under the assumption that all results of Instr are unused, or 'false' if +%% there is no such sequence (other than [Instr] itself). + +reduce_unused(Instr) -> + case Instr of + #alub{dst=Dst} when Dst =/= [] -> + [Instr#alub{dst=[]}]; + _ -> + case is_safe(Instr) of + true -> []; + false -> false + end + end. + %% %% True if argument is an alu-operator %% @@ -1386,17 +1379,6 @@ redirect_jmp(Jmp, ToOld, ToNew) -> %% OBS: In a jmp instruction more than one labels may be identical %% and thus need redirection! case Jmp of - #branch{} -> - TmpJmp = case branch_true_label(Jmp) of - ToOld -> branch_true_label_update(Jmp, ToNew); - _ -> Jmp - end, - case branch_false_label(TmpJmp) of - ToOld -> - branch_false_label_update(TmpJmp, ToNew); - _ -> - TmpJmp - end; #switch{} -> NewLbls = [case Lbl =:= ToOld of true -> ToNew; @@ -1591,13 +1573,6 @@ pp_instr(Dev, I) -> io:format(Dev, "~n", []); #label{} -> io:format(Dev, "L~w:~n", [label_name(I)]); - #branch{} -> - io:format(Dev, " if (", []), - pp_arg(Dev, branch_src1(I)), - io:format(Dev, " ~w ", [branch_cond(I)]), - pp_arg(Dev, branch_src2(I)), - io:format(Dev, ") then L~w (~.2f) else L~w~n", - [branch_true_label(I), branch_pred(I), branch_false_label(I)]); #switch{} -> io:format(Dev, " switch (", []), pp_arg(Dev, switch_src(I)), @@ -1606,7 +1581,10 @@ pp_instr(Dev, I) -> io:format(Dev, ">\n", []); #alub{} -> io:format(Dev, " ", []), - pp_arg(Dev, alub_dst(I)), + case alub_has_dst(I) of + true -> pp_arg(Dev, alub_dst(I)); + false -> io:format(Dev, "_", []) + end, io:format(Dev, " <- ", []), pp_arg(Dev, alub_src1(I)), io:format(Dev, " ~w ", [alub_op(I)]), diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl index cc76e7e5c4..74020c6045 100644 --- a/lib/hipe/rtl/hipe_rtl.hrl +++ b/lib/hipe/rtl/hipe_rtl.hrl @@ -28,7 +28,6 @@ -record(alu, {dst, src1, op, src2}). -record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}). --record(branch, {src1, src2, 'cond', true_label, false_label, p}). -record(call, {dstlist, 'fun', arglist, type, continuation, failcontinuation, normalcontinuation = []}). -record(comment, {text}). diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl index 367d76b24d..baf5f7d27a 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_construct.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl @@ -429,8 +429,8 @@ realloc_binary(SizeReg, ProcBin, Base) -> hipe_tagscheme:set_field_from_term(ProcBinFlagsTag, ProcBin, Flags), hipe_tagscheme:get_field_from_term(ProcBinValTag, ProcBin, BinPointer), hipe_tagscheme:get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize), - hipe_rtl:mk_branch(OrigSize, 'ltu', ResultingSize, - ReallocLblName, NoReallocLblName), + hipe_rtl:mk_branch(OrigSize, 'geu', ResultingSize, NoReallocLblName, + ReallocLblName), NoReallocLbl, hipe_tagscheme:get_field_from_term(ProcBinBytesTag, ProcBin, Base), hipe_rtl:mk_goto(ContLblName), @@ -757,9 +757,9 @@ test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) -> [AlignedLbl, CLbl] = create_lbls(2), [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits), hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset), - hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq', - hipe_rtl:label_name(AlignedLbl), - hipe_rtl:label_name(CLbl)), + hipe_rtl:mk_branch(Tmp, 'and', ?LOW_BITS, 'eq', + hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(CLbl), 0.5), AlignedLbl, AlignedCode, CLbl, @@ -1284,8 +1284,7 @@ is_divisible(Dividend, Divisor, SuccLbl, FailLbl) -> true -> %% Divisor is a power of 2 %% Test that the Log2-1 lowest bits are clear Mask = hipe_rtl:mk_imm(Divisor - 1), - [Tmp] = create_regs(1), - [hipe_rtl:mk_alub(Tmp, Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)]; + [hipe_rtl:mk_branch(Dividend, 'and', Mask, eq, SuccLbl, FailLbl, 0.99)]; false -> %% We need division, fall back to a primop [hipe_rtl:mk_call([], is_divisible, [Dividend, hipe_rtl:mk_imm(Divisor)], diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl index d999cd2743..520b055ba7 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_match.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl @@ -659,9 +659,8 @@ test_alignment_code(Size, Unit, SLblName, FalseLblName) -> end. get_fast_test_code(Size, AndTest, SLblName, FalseLblName) -> - [Tmp] = create_gcsafe_regs(1), - [hipe_rtl:mk_alub(Tmp, Size, 'and', hipe_rtl:mk_imm(AndTest), - 'eq', SLblName, FalseLblName)]. + [hipe_rtl:mk_branch(Size, 'and', hipe_rtl:mk_imm(AndTest), 'eq', + SLblName, FalseLblName, 0.5)]. %% This is really slow get_slow_test_code(Size, Unit, SLblName, FalseLblName) -> diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl index f49e8f815f..e802b320c2 100644 --- a/lib/hipe/rtl/hipe_rtl_cfg.erl +++ b/lib/hipe/rtl/hipe_rtl_cfg.erl @@ -83,9 +83,7 @@ mk_goto(Name) -> branch_successors(Instr) -> case Instr of - #branch{} -> [hipe_rtl:branch_true_label(Instr), - hipe_rtl:branch_false_label(Instr)]; - #alub{} -> [hipe_rtl:alub_true_label(Instr), + #alub{} -> [hipe_rtl:alub_true_label(Instr), hipe_rtl:alub_false_label(Instr)]; #switch{} -> hipe_rtl:switch_labels(Instr); #call{} -> @@ -106,7 +104,6 @@ fails_to(Instr) -> is_branch(Instr) -> case Instr of - #branch{} -> true; #alub{} -> true; #switch{} -> true; #goto{} -> true; @@ -127,7 +124,7 @@ is_branch(Instr) -> is_pure_branch(Instr) -> case Instr of - #branch{} -> true; + #alub{} -> not hipe_rtl:alub_has_dst(Instr); #switch{} -> true; #goto{} -> true; _ -> false diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl index 71bd06c0df..67ddd0f649 100644 --- a/lib/hipe/rtl/hipe_rtl_lcm.erl +++ b/lib/hipe/rtl/hipe_rtl_lcm.erl @@ -378,7 +378,6 @@ is_expr(I) -> %% end; #alub{} -> false; %% TODO: Split instruction to consider alu expression? - #branch{} -> false; #call{} -> false; %% We cannot prove that a call has no side-effects #comment{} -> false; #enter{} -> false; diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl index 7158383010..f887eeab66 100644 --- a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl +++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl @@ -110,8 +110,6 @@ visit_expression(Instruction, Environment) -> visit_alu(Instruction, Environment); #alub{} -> visit_alub(Instruction, Environment); - #branch{} -> - visit_branch(Instruction, Environment); #call{} -> visit_call(Instruction, Environment); %% #comment{} -> @@ -184,42 +182,6 @@ set_to(Dst, Val, Env) -> {[], SSAWork, Env1}. %%----------------------------------------------------------------------------- -%% Procedure : visit_branch/2 -%% Purpose : do symbolic exection of branch instructions. -%% Arguments : Inst - The instruction -%% Env - The environment -%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} -%%----------------------------------------------------------------------------- - -visit_branch(Inst, Env) -> %% Titta också på exekverbarflagga - Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env), - Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env), - CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of - true -> [hipe_rtl:branch_true_label(Inst)]; - false -> [hipe_rtl:branch_false_label(Inst)]; - bottom -> [hipe_rtl:branch_true_label(Inst), - hipe_rtl:branch_false_label(Inst)]; - top -> [] - end, - {CFGWL, [], Env}. - -%%----------------------------------------------------------------------------- -%% Procedure : evaluate_relop/3 -%% Purpose : evaluate the given relop. While taking care to handle top & -%% bottom in some sane way. -%% Arguments : Val1, Val2 - The operands Integers or top or bottom -%% RelOp - some relop atom from rtl. -%% Returns : bottom, top, true or false -%%----------------------------------------------------------------------------- - -evaluate_relop(Val1, RelOp, Val2) -> - if - (Val1==bottom) or (Val2==bottom) -> bottom ; - (Val1==top) or (Val2==top) -> top; - true -> hipe_rtl_arch:eval_cond(RelOp, Val1, Val2) - end. - -%%----------------------------------------------------------------------------- %% Procedure : evaluate_fixnumop/2 %% Purpose : try to evaluate a fixnumop. %% Arguments : Val1 - operand (an integer, 'top' or 'bottom') @@ -408,6 +370,7 @@ partial_eval_branch(Cond, N0, Z0, V0, C0) -> Cond =:= 'ne' -> {true, Z0, true, true}; Cond =:= 'gt'; Cond =:= 'le' -> {N0, Z0, V0, true}; + Cond =:= 'leu'; Cond =:= 'gtu' -> {true, Z0, true, C0 }; Cond =:= 'lt'; Cond =:= 'ge' -> {N0, true, V0, true}; @@ -450,7 +413,11 @@ visit_alub(Inst, Env) -> false -> [hipe_rtl:alub_false_label(Inst)] end end, - {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal, Env), + {[], NewSSA, NewEnv} = + case hipe_rtl:alub_has_dst(Inst) of + false -> {[], [], Env}; + true -> set_to(hipe_rtl:alub_dst(Inst), NewVal, Env) + end, {Labels, NewSSA, NewEnv}. %%----------------------------------------------------------------------------- @@ -688,8 +655,6 @@ update_instruction(Inst, Env) -> update_alu(Inst, Env); #alub{} -> update_alub(Inst, Env); - #branch{} -> - update_branch(Inst, Env); #call{} -> subst_all_uses(Inst, Env); %% #comment{} -> @@ -902,33 +867,6 @@ update_alu(Inst, Env) -> {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))] end. - -%%----------------------------------------------------------------------------- -%% Procedure : update_branch/2 -%% Purpose : update an branch-instruction -%% Arguments : Inst - the instruction. -%% Env - in which everything happens. -%% Returns : list of new instruction -%%----------------------------------------------------------------------------- - -update_branch(Inst, Env) -> - Src1 = hipe_rtl:branch_src1(Inst), - Src2 = hipe_rtl:branch_src2(Inst), - Val1 = lookup_lattice_value(Src1, Env), - Val2 = lookup_lattice_value(Src2, Env), - if - (Val1 =:= bottom) and (Val2 =:= bottom) -> - [Inst]; - Val1 =:= bottom -> - [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)]; - Val2 =:= bottom -> - [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)]; - true -> - case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of - true -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))]; - false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))] - end - end. %%----------------------------------------------------------------------------- %% Procedure : update_alub/2 @@ -943,8 +881,12 @@ update_branch(Inst, Env) -> %% some small helpers. alub_to_move(Inst, Res, Lab) -> - [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res), - hipe_rtl:mk_goto(Lab)]. + Goto = [hipe_rtl:mk_goto(Lab)], + case hipe_rtl:alub_has_dst(Inst) of + false -> Goto; + true -> + [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res) | Goto] + end. make_alub_subst_list(bottom, _, Tail) -> Tail; make_alub_subst_list(top, Src, _) -> diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl index 8cf45772b5..5d11b9b82e 100644 --- a/lib/hipe/rtl/hipe_tagscheme.erl +++ b/lib/hipe/rtl/hipe_tagscheme.erl @@ -171,22 +171,21 @@ test_nil(X, TrueLab, FalseLab, Pred) -> hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred). test_cons(X, TrueLab, FalseLab, Pred) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST), - hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). test_is_boxed(X, TrueLab, FalseLab, Pred) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED), - hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + hipe_rtl:mk_branch(X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). get_header(Res, X) -> hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))). mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)), - hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)]. + [hipe_rtl:mk_alu(Tmp, X, 'sub', hipe_rtl:mk_imm(Value)), + hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(Mask), + eq, TrueLab, FalseLab, Pred)]. test_immed1(X, Value, TrueLab, FalseLab, Pred) -> mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred). @@ -238,13 +237,12 @@ test_atom(X, TrueLab, FalseLab, Pred) -> test_tuple(X, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), HalfTrueLab = hipe_rtl:mk_new_label(), [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), HalfTrueLab, get_header(Tmp, X), - hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - TrueLab, FalseLab, Pred)]. + hipe_rtl:mk_branch(Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + TrueLab, FalseLab, Pred)]. test_tuple_N(X, N, TrueLab, FalseLab, Pred) -> Tmp = hipe_rtl:mk_new_reg_gcsafe(), @@ -466,12 +464,17 @@ test_fixnums_1([Arg1, Arg2|Args], Acc) -> test_two_fixnums(Arg1, Arg2, FalseLab) -> TrueLab = hipe_rtl:mk_new_label(), - case hipe_rtl:is_imm(Arg2) of + case hipe_rtl:is_imm(Arg1) orelse hipe_rtl:is_imm(Arg2) of true -> - Value = hipe_rtl:imm_value(Arg2), + {Imm, Var} = + case hipe_rtl:is_imm(Arg1) of + true -> {Arg1, Arg2}; + false -> {Arg2, Arg1} + end, + Value = hipe_rtl:imm_value(Imm), case Value band ?TAG_IMMED1_MASK of ?TAG_IMMED1_SMALL -> - [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + [test_fixnum(Var, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), TrueLab]; _ -> [hipe_rtl:mk_goto(FalseLab)] @@ -512,28 +515,48 @@ unsafe_fixnum_sub(Arg1, Arg2, Res) -> %%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag %%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag -fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) -> - Tmp = hipe_rtl:mk_new_reg_gcsafe(), +fixnum_addsub(AluOp, Arg1, Arg2, FinalRes, OtherLab) -> + NoOverflowLab = hipe_rtl:mk_new_label(), %% XXX: Consider moving this test to the users of fixnum_addsub. - case Arg1 =/= Res andalso Arg2 =/= Res of - true -> - %% Args differ from res. - NoOverflowLab = hipe_rtl:mk_new_label(), - [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), - hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow, - hipe_rtl:label_name(NoOverflowLab), - hipe_rtl:label_name(OtherLab), 0.99), - NoOverflowLab]; + {Res, Tail} = + case Arg1 =/= FinalRes andalso Arg2 =/= FinalRes of + true -> + %% Args differ from res. + {FinalRes, [NoOverflowLab]}; + false -> + %% At least one of the arguments is the same as Res. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + {Tmp, [NoOverflowLab, hipe_rtl:mk_move(FinalRes, Tmp)]} + end, + case (hipe_rtl:is_imm(Arg1) andalso AluOp =:= 'add') + orelse hipe_rtl:is_imm(Arg2) + of + true -> + %% Pre-compute the untagged immediate. The optimisers won't do this for us + %% since they don't know that the untag never underflows. + {Var, Imm0} = + case hipe_rtl:is_imm(Arg2) of + true -> {Arg1, Arg2}; + false -> {Arg2, Arg1} + end, + Imm = hipe_rtl:mk_imm(hipe_rtl:imm_value(Imm0) - ?TAG_IMMED1_SMALL), + [hipe_rtl:mk_alub(Res, Var, AluOp, Imm, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99) + |Tail]; false -> - %% At least one of the arguments is the same as Res. - Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg? - NoOverflowLab = hipe_rtl:mk_new_label(), - [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), - hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow, + %% Commute add to save a move on x86 + {UntagFirst, Lhs, Rhs} = + case AluOp of + 'add' -> {Arg1, Res, Arg2}; + 'sub' -> {Arg2, Arg1, Res} + end, + [hipe_rtl:mk_alu(Res, UntagFirst, sub, + hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Res, Lhs, AluOp, Rhs, not_overflow, hipe_rtl:label_name(NoOverflowLab), - hipe_rtl:label_name(OtherLab), 0.99), - NoOverflowLab, - hipe_rtl:mk_move(Res, Tmp2)] + hipe_rtl:label_name(OtherLab), 0.99) + |Tail] end. %%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag @@ -687,7 +710,6 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> IndexOkLab = hipe_rtl:mk_new_label(), Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple Header = hipe_rtl:mk_new_reg_gcsafe(), - Tmp = hipe_rtl:mk_new_reg_gcsafe(), UIndex = hipe_rtl:mk_new_reg_gcsafe(), Arity = hipe_rtl:mk_new_reg_gcsafe(), InvIndex = hipe_rtl:mk_new_reg_gcsafe(), @@ -700,9 +722,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> BoxedOkLab, hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', @@ -716,9 +738,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> BoxedOkLab, hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, hipe_rtl:mk_alu(Arity, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| @@ -734,9 +756,9 @@ element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> BoxedOkLab, hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), - hipe_rtl:mk_alub(Tmp, Header, 'and', - hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', - hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + hipe_rtl:mk_branch(Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), TupleOkLab, untag_fixnum(UIndex, Index), hipe_rtl:mk_alu(Arity, Header, 'srl', @@ -870,12 +892,10 @@ heap_arch_spec(HP) -> hipe_rtl_arch:pcb_store(?P_OFF_HEAP_FIRST, HP)]. test_heap_binary(Binary, TrueLblName, FalseLblName) -> - Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), - [get_header(Tmp1, Binary), - hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), - hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), - TrueLblName, FalseLblName)]. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp, Binary), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_HEAP_BIN, + TrueLblName, FalseLblName, 0.5)]. mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, @@ -903,11 +923,10 @@ build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, set_field_from_term({sub_binary, orig}, Dst, Orig)]. test_subbinary(Binary, TrueLblName, FalseLblName) -> - Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), - Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), - [get_header(Tmp1, Binary), - hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), - hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)]. + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp, Binary), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, + TrueLblName, FalseLblName, 0.5)]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% diff --git a/lib/hipe/sparc/hipe_rtl_to_sparc.erl b/lib/hipe/sparc/hipe_rtl_to_sparc.erl index e170fec3d6..7fab0d95c7 100644 --- a/lib/hipe/sparc/hipe_rtl_to_sparc.erl +++ b/lib/hipe/sparc/hipe_rtl_to_sparc.erl @@ -63,7 +63,6 @@ conv_insn(I, Map, Data) -> case I of #alu{} -> conv_alu(I, Map, Data); #alub{} -> conv_alub(I, Map, Data); - #branch{} -> conv_branch(I, Map, Data); #call{} -> conv_call(I, Map, Data); #comment{} -> conv_comment(I, Map, Data); #enter{} -> conv_enter(I, Map, Data); @@ -281,7 +280,12 @@ mk_alu_rs(XAluOp, Src1, Src2, Dst) -> conv_alub(I, Map, Data) -> %% dst = src1 aluop src2; if COND goto label - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + HasDst = hipe_rtl:alub_has_dst(I), + {Dst, Map0} = + case HasDst of + false -> {hipe_sparc:mk_g0(), Map}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map) + end, {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), Cond = conv_cond(hipe_rtl:alub_cond(I)), @@ -307,67 +311,33 @@ conv_alub(I, Map, Data) -> I1 ++ [hipe_sparc:mk_rdy(TmpHi), hipe_sparc:mk_alu('sra', Dst, hipe_sparc:mk_uimm5(31), TmpSign) | - conv_alub2(G0, TmpSign, 'sub', NewCond, TmpHi, I)]; + conv_alub2(G0, TmpSign, 'cmpcc', NewCond, TmpHi, I)]; _ -> - conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) + XAluOp = + case (not HasDst) andalso RtlAlubOp =:= 'sub' of + true -> 'cmpcc'; % == a subcc that commutes + false -> conv_alubop_cc(RtlAlubOp) + end, + conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) end, {I2, Map2, Data}. --ifdef(notdef). % XXX: only for sparc64, alas -conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> - case conv_cond_rcond(Cond) of - [] -> - conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I); - RCond -> - conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) - end. +conv_alub2(Dst, Src1, XAluOp, Cond, Src2, I) -> + conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I). -conv_alub_br(Dst, Src1, RtlAlubOp, RCond, Src2, I) -> - TrueLab = hipe_rtl:alub_true_label(I), - FalseLab = hipe_rtl:alub_false_label(I), - Pred = hipe_rtl:alub_pred(I), - %% "Dst = Src1 AluOp Src2; if COND" becomes - %% "Dst = Src1 AluOp Src2; if-COND(Dst)" - {I2, _DidCommute} = mk_alu(conv_alubop_nocc(RtlAlubOp), Src1, Src2, Dst), - I2 ++ mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred). - -conv_cond_rcond(Cond) -> - case Cond of - 'e' -> 'z'; - 'ne' -> 'nz'; - 'g' -> 'gz'; - 'ge' -> 'gez'; - 'l' -> 'lz'; - 'le' -> 'lez'; - _ -> [] % vs, vc, gu, geu, lu, leu - end. - -conv_alubop_nocc(RtlAlubOp) -> - case RtlAlubOp of - 'add' -> 'add'; - 'sub' -> 'sub'; - %% mul: handled elsewhere - 'or' -> 'or'; - 'and' -> 'and'; - 'xor' -> 'xor' - %% no shift ops - end. - -mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred) -> - [hipe_sparc:mk_pseudo_br(RCond, Dst, TrueLab, FalseLab, Pred)]. --else. -conv_alub2(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> - conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I). --endif. - -conv_alub_bp(Dst, Src1, RtlAlubOp, Cond, Src2, I) -> +conv_alub_bp(Dst, Src1, XAluOp, Cond, Src2, I) -> TrueLab = hipe_rtl:alub_true_label(I), FalseLab = hipe_rtl:alub_false_label(I), Pred = hipe_rtl:alub_pred(I), %% "Dst = Src1 AluOp Src2; if COND" becomes %% "Dst = Src1 AluOpCC Src22; if-COND(CC)" - {I2, _DidCommute} = mk_alu(conv_alubop_cc(RtlAlubOp), Src1, Src2, Dst), - I2 ++ mk_pseudo_bp(Cond, TrueLab, FalseLab, Pred). + {I2, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), + NewCond = + case DidCommute andalso XAluOp =:= 'cmpcc' of + true -> commute_cond(Cond); % subcc does not commute; its conditions do + false -> Cond + end, + I2 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred). conv_alubop_cc(RtlAlubOp) -> case RtlAlubOp of @@ -380,69 +350,6 @@ conv_alubop_cc(RtlAlubOp) -> %% no shift ops end. -conv_branch(I, Map, Data) -> - %% <unused> = src1 - src2; if COND goto label - {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cond = conv_cond(hipe_rtl:branch_cond(I)), - I2 = conv_branch2(Src1, Cond, Src2, I), - {I2, Map1, Data}. - --ifdef(notdef). % XXX: only for sparc64, alas -conv_branch2(Src1, Cond, Src2, I) -> - case conv_cond_rcond(Cond) of - [] -> - conv_branch_bp(Src1, Cond, Src2, I); - RCond -> - conv_branch_br(Src1, RCond, Src2, I) - end. - -conv_branch_br(Src1, RCond, Src2, I) -> - TrueLab = hipe_rtl:branch_true_label(I), - FalseLab = hipe_rtl:branch_false_label(I), - Pred = hipe_rtl:branch_pred(I), - %% "if src1-COND-src2" becomes - %% "sub src1,src2,tmp; if-COND(tmp)" - Dst = hipe_sparc:mk_new_temp('untagged'), - XAluOp = 'cmp', % == a sub that commutes - {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), - NewRCond = - case DidCommute of - true -> commute_rcond(RCond); - false -> RCond - end, - I1 ++ mk_pseudo_br(NewRCond, Dst, TrueLab, FalseLab, Pred). - -commute_rcond(RCond) -> % if x RCond y, then y commute_rcond(RCond) x - case RCond of - 'z' -> 'z'; % ==, == - 'nz' -> 'nz'; % !=, != - 'gz' -> 'lz'; % >, < - 'gez' -> 'lez'; % >=, <= - 'lz' -> 'gz'; % <, > - 'lez' -> 'gez' % <=, >= - end. --else. -conv_branch2(Src1, Cond, Src2, I) -> - conv_branch_bp(Src1, Cond, Src2, I). --endif. - -conv_branch_bp(Src1, Cond, Src2, I) -> - TrueLab = hipe_rtl:branch_true_label(I), - FalseLab = hipe_rtl:branch_false_label(I), - Pred = hipe_rtl:branch_pred(I), - %% "if src1-COND-src2" becomes - %% "subcc src1,src2,%g0; if-COND(CC)" - Dst = hipe_sparc:mk_g0(), - XAluOp = 'cmpcc', % == a subcc that commutes - {I1, DidCommute} = mk_alu(XAluOp, Src1, Src2, Dst), - NewCond = - case DidCommute of - true -> commute_cond(Cond); - false -> Cond - end, - I1 ++ mk_pseudo_bp(NewCond, TrueLab, FalseLab, Pred). - conv_call(I, Map, Data) -> {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), diff --git a/lib/hipe/sparc/hipe_sparc_frame.erl b/lib/hipe/sparc/hipe_sparc_frame.erl index 37f29e660a..bd94d3318c 100644 --- a/lib/hipe/sparc/hipe_sparc_frame.erl +++ b/lib/hipe/sparc/hipe_sparc_frame.erl @@ -110,7 +110,10 @@ do_pseudo_move(I, Context, FPoff) -> Offset = pseudo_offset(Src, FPoff, Context), mk_load(hipe_sparc:mk_sp(), Offset, Dst, []); _ -> - [hipe_sparc:mk_mov(Src, Dst)] + case hipe_sparc:temp_reg(Dst) =:= hipe_sparc:temp_reg(Src) of + true -> []; + false -> [hipe_sparc:mk_mov(Src, Dst)] + end end end. diff --git a/lib/hipe/ssa/hipe_ssa.inc b/lib/hipe/ssa/hipe_ssa.inc index 83ab320306..b511bb6f25 100644 --- a/lib/hipe/ssa/hipe_ssa.inc +++ b/lib/hipe/ssa/hipe_ssa.inc @@ -1,4 +1,4 @@ -%% -*- erlang-indent-level: 2 -*- +%% -*- mode: erlang; erlang-indent-level: 2 -*- %% %% %CopyrightBegin% %% @@ -943,9 +943,9 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> false -> do_code(Instrs, LiveIn, Changed, [Instr|Acc]); true -> - case ?CODE:is_safe(Instr) of + case ?CODE:is_call(Instr) of true -> - case ?CODE:is_call(Instr) of + case ?CODE:is_safe(Instr) of true -> case ?CODE:call_continuation(Instr) of [] -> @@ -955,11 +955,6 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> do_code(Instrs, LiveOut, true, [NewInstr|Acc]) end; false -> - do_code(Instrs, LiveOut, true, Acc) - end; - false -> %% not a safe instruction - cannot be removed - case ?CODE:is_call(Instr) of - true -> case ?CODE:call_dstlist(Instr) of [] -> %% result was not used anyway; no change do_code(Instrs, LiveIn, Changed, [Instr|Acc]); @@ -968,9 +963,14 @@ do_code([Instr|Instrs], LiveOut, Changed, Acc) -> do_code(Instrs, LiveIn, true, [NewInstr|Acc]); [_|_] -> %% calls with multiple dests are left untouched do_code(Instrs, LiveIn, Changed, [Instr|Acc]) - end; - false -> - do_code(Instrs, LiveIn, Changed, [Instr|Acc]) + end + end; + false -> + case ?CODE:reduce_unused(Instr) of + false -> % not a safe instruction - cannot be removed + do_code(Instrs, LiveIn, Changed, [Instr|Acc]); + Replacement -> + do_code(lists:reverse(Replacement, Instrs), LiveOut, true, Acc) end end end; diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl index 4c8c98551c..851b7da2dd 100644 --- a/lib/hipe/x86/hipe_rtl_to_x86.erl +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -91,26 +91,31 @@ conv_insn(I, Map, Data) -> #alub{} -> %% dst = src1 op src2; if COND goto label BinOp = conv_binop(hipe_rtl:alub_op(I)), - {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), - {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), - {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:alub_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:alub_src2(I), Map0), Cc = conv_cond(hipe_rtl:alub_cond(I)), - I1 = [hipe_x86:mk_pseudo_jcc(Cc, - hipe_rtl:alub_true_label(I), - hipe_rtl:alub_false_label(I), - hipe_rtl:alub_pred(I))], - I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), - {FixSrc1++FixSrc2++I2, Map2, Data}; - #branch{} -> - %% <unused> = src1 - src2; if COND goto label - {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), - {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), - Cc = conv_cond(hipe_rtl:branch_cond(I)), - I2 = conv_branch(Src1, Cc, Src2, - hipe_rtl:branch_true_label(I), - hipe_rtl:branch_false_label(I), - hipe_rtl:branch_pred(I)), - {FixSrc1++FixSrc2++I2, Map1, Data}; + BranchOp = conv_branchop(BinOp), + HasDst = hipe_rtl:alub_has_dst(I), + {I2, Map3} = + case (not HasDst) andalso BranchOp =/= none of + true -> + {conv_branch(Src1, BranchOp, Src2, Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), Map1}; + false -> + {Dst, Map2} = + case HasDst of + false -> {new_untagged_temp(), Map1}; + true -> conv_dst(hipe_rtl:alub_dst(I), Map1) + end, + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + {conv_alu(Dst, Src1, BinOp, Src2, I1), Map2} + end, + {FixSrc1++FixSrc2++I2, Map3, Data}; #call{} -> %% push <arg1> %% ... @@ -252,7 +257,9 @@ conv_insn(I, Map, Data) -> conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> case (not same_opnd(Dst, Src1)) andalso (not same_opnd(Dst, Src2)) - andalso (hipe_x86:is_temp(Src1) orelse hipe_x86:is_temp(Src2)) + %% We could use orelse instead of xor here to generate lea T1(T2), T3, but + %% they seem to move coalesce so well that move+add is better for them. + andalso (hipe_x86:is_temp(Src1) xor hipe_x86:is_temp(Src2)) of false -> conv_alu(Dst, Src1, 'add', Src2, Tail); true -> % Use LEA @@ -263,6 +270,16 @@ conv_alu_nocc(Dst, Src1, 'add', Src2, Tail) -> end, [hipe_x86:mk_lea(Mem, Dst) | Tail] end; +conv_alu_nocc(Dst, Src1, 'sub', Src2, Tail) -> + case (not same_opnd(Dst, Src1)) andalso hipe_x86:is_temp(Src1) + andalso (not hipe_x86:is_temp(Src2)) + of + false -> conv_alu(Dst, Src1, 'sub', Src2, Tail); + true -> % Use LEA + Imm = hipe_x86:mk_imm(-hipe_x86:imm_value(Src2)), + Mem = hipe_x86:mk_mem(Src1, Imm, typeof_dst(Dst)), + [hipe_x86:mk_lea(Mem, Dst) | Tail] + end; conv_alu_nocc(Dst, Src1, BinOp, Src2, Tail) -> conv_alu(Dst, Src1, BinOp, Src2, Tail). @@ -360,28 +377,41 @@ conv_shift(Dst, Src1, BinOp, Src2) -> %%% Finalise the conversion of a conditional branch operation, taking %%% care to not introduce more temps and moves than necessary. -conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +conv_branchop('sub') -> 'cmp'; +conv_branchop('and') -> 'test'; +conv_branchop(_) -> none. + +branchop_commutes('cmp') -> false; +branchop_commutes('test') -> true. + +conv_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> case hipe_x86:is_imm(Src1) of false -> - mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred); true -> case hipe_x86:is_imm(Src2) of false -> - NewCc = commute_cc(Cc), - mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + NewCc = case branchop_commutes(Op) of + true -> Cc; + false -> commute_cc(Cc) + end, + mk_branch(Src2, Op, Src1, NewCc, TrueLab, FalseLab, Pred); true -> %% two immediates, let the optimiser clean it up Tmp = new_untagged_temp(), [hipe_x86:mk_move(Src1, Tmp) | - mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + mk_branch(Tmp, Op, Src2, Cc, TrueLab, FalseLab, Pred)] end end. -mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> +mk_branch(Src1, Op, Src2, Cc, TrueLab, FalseLab, Pred) -> %% PRE: not(is_imm(Src1)) - [hipe_x86:mk_cmp(Src2, Src1), + [mk_branchtest(Src1, Op, Src2), hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. +mk_branchtest(Src1, cmp, Src2) -> hipe_x86:mk_cmp(Src2, Src1); +mk_branchtest(Src1, test, Src2) -> hipe_x86:mk_test(Src2, Src1). + %%% Convert an RTL ALU or ALUB binary operator. conv_binop(BinOp) -> diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl index 33d7f77cf1..45bf1ad736 100644 --- a/lib/hipe/x86/hipe_x86.erl +++ b/lib/hipe/x86/hipe_x86.erl @@ -37,7 +37,7 @@ mk_imm_from_addr/2, mk_imm_from_atom/1, is_imm/1, - %% imm_value/1, + imm_value/1, mk_mem/3, %% is_mem/1, @@ -201,7 +201,7 @@ shift_src/1, shift_dst/1, - %% mk_test/2, + mk_test/2, test_src/1, test_dst/1, @@ -218,6 +218,10 @@ %% highest_temp/1 ]). +%% Other utilities +-export([neg_cc/1 + ]). + %%% %%% Low-level accessors. %%% @@ -241,7 +245,7 @@ mk_imm_from_addr(Addr, Type) -> mk_imm_from_atom(Atom) -> mk_imm(Atom). is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. -%% imm_value(#x86_imm{value=Value}) -> Value. +imm_value(#x86_imm{value=Value}) -> Value. mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. %% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. @@ -305,7 +309,7 @@ mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. cmp_src(#cmp{src=Src}) -> Src. cmp_dst(#cmp{dst=Dst}) -> Dst. -%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. +mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. test_src(#test{src=Src}) -> Src. test_dst(#test{dst=Dst}) -> Dst. diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl index e21223a5b1..e692ff0ebb 100644 --- a/lib/hipe/x86/hipe_x86_assemble.erl +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -599,10 +599,20 @@ temp_to_xmm(#x86_temp{reg=Reg}) -> {xmm, Reg}. -ifdef(HIPE_AMD64). +temp_to_rm8(#x86_temp{reg=Reg}) -> + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64(#x86_temp{reg=Reg}) -> {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-else. +temp_to_rm8(#x86_temp{reg=Reg}) -> + true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg), + {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm16(#x86_temp{reg=Reg}) -> + {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}. -endif. +temp_to_rm32(#x86_temp{reg=Reg}) -> + {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rmArch(#x86_temp{reg=Reg}) -> {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. temp_to_rm64fp(#x86_temp{reg=Reg}) -> @@ -878,15 +888,29 @@ resolve_alu_args(Src, Dst, Context) -> %%% test resolve_test_args(Src, Dst, Context) -> case Src of - #x86_imm{} -> % imm8 not allowed - {_ImmSize,ImmValue} = translate_imm(Src, Context, false), - NewDst = - case Dst of - #x86_temp{reg=0} -> ?EAX; - #x86_temp{} -> temp_to_rmArch(Dst); - #x86_mem{} -> mem_to_rmArch(Dst) - end, - {NewDst, {imm32,ImmValue}}; + %% Since we're using an 8-bit instruction, the immediate is not sign + %% extended. Thus, we can use immediates up to 255. + #x86_imm{value=ImmVal} + when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 -> + Imm = {imm8, ImmVal}, + case Dst of + #x86_temp{reg=0} -> {al, Imm}; + #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst); + #x86_mem{} -> {mem_to_rm8(Dst), Imm} + end; + #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 -> + {case Dst of + #x86_temp{reg=0} -> eax; + #x86_temp{} -> temp_to_rm32(Dst); + #x86_mem{} -> mem_to_rm32(Dst) + end, {imm32, ImmVal}}; + #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32 + {_, ImmVal} = translate_imm(Src, Context, false), + {case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, {imm32, ImmVal}}; #x86_temp{} -> NewDst = case Dst of @@ -896,6 +920,18 @@ resolve_test_args(Src, Dst, Context) -> {NewDst, temp_to_regArch(Src)} end. +-ifdef(HIPE_AMD64). +resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}. +-else. +resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) -> + case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of + true -> {temp_to_rm8(Dst), Imm}; + false -> + %% Register does not exist in 8-bit version; use 16-bit instead + {temp_to_rm16(Dst), {imm16, ImmVal}} + end. +-endif. + %%% shifts resolve_shift_args(Src, Dst, Context) -> RM32 = diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl index 4455def74e..ab26370a80 100644 --- a/lib/hipe/x86/hipe_x86_defuse.erl +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -60,7 +60,7 @@ insn_def(I) -> #pseudo_tailcall_prepare{} -> tailcall_clobbered(); #shift{dst=Dst} -> dst_def(Dst); %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label - %% pseudo_jcc, pseudo_tailcall, push, ret + %% pseudo_jcc, pseudo_tailcall, push, ret, test _ -> [] end. @@ -120,6 +120,7 @@ insn_use(I) -> #push{src=Src} -> addtemp(Src, []); #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #test{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare _ -> [] end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl index 3b7be86608..2d1663d0d6 100644 --- a/lib/hipe/x86/hipe_x86_encode.erl +++ b/lib/hipe/x86/hipe_x86_encode.erl @@ -65,6 +65,7 @@ cc/1, % 8-bit registers %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + reg_has_8bit/1, % 32-bit registers %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, % operands @@ -143,6 +144,8 @@ cc(g) -> ?CC_G. %% dh() -> ?DH. %% bh() -> ?BH. +reg_has_8bit(Reg) -> Reg =< ?BL. + %%% 32-bit registers -define(EAX, 2#000). @@ -700,8 +703,16 @@ shd_op_sizeof(Opnds) -> test_encode(Opnds) -> case Opnds of + {al, {imm8,Imm8}} -> + [16#A8, Imm8]; + {ax, {imm16,Imm16}} -> + [?PFX_OPND, 16#A9 | le16(Imm16, [])]; {eax, {imm32,Imm32}} -> [16#A9 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [16#F6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#F7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; {{rm32,RM32}, {imm32,Imm32}} -> [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; {{rm32,RM32}, {reg32,Reg32}} -> @@ -710,8 +721,16 @@ test_encode(Opnds) -> test_sizeof(Opnds) -> case Opnds of + {al, {imm8,_}} -> + 1 + 1; + {ax, {imm16,_}} -> + 2 + 2; {eax, {imm32,_}} -> 1 + 4; + {{rm8,RM8}, {imm8,_}} -> + 1 + sizeof_rm(RM8) + 1; + {{rm16,RM16}, {imm16,_}} -> + 2 + sizeof_rm(RM16) + 2; {{rm32,RM32}, {imm32,_}} -> 1 + sizeof_rm(RM32) + 4; {{rm32,RM32}, {reg32,_}} -> @@ -1283,7 +1302,11 @@ dotest1(OS) -> t(OS,'sub',{RM32,Imm8}), t(OS,'sub',{RM32,Reg32}), t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{al,Imm8}), + t(OS,'test',{ax,Imm16}), t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM8,Imm8}), + t(OS,'test',{RM16,Imm16}), t(OS,'test',{RM32,Imm32}), t(OS,'test',{RM32,Reg32}), t(OS,'xor',{eax,Imm32}), diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl index fc782571bf..17253ad46f 100644 --- a/lib/hipe/x86/hipe_x86_frame.erl +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -116,6 +116,8 @@ do_insn(I, LiveOut, Context, FPoff) -> {do_ret(I, Context, FPoff), context_framesize(Context)}; #shift{} -> {[do_shift(I, Context, FPoff)], FPoff}; + #test{} -> + {[do_test(I, Context, FPoff)], FPoff}; _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare {[I], FPoff} end. @@ -188,6 +190,12 @@ do_shift(I, Context, FPoff) -> Dst = conv_opnd(Dst0, FPoff, Context), I#shift{src=Src,dst=Dst}. +do_test(I, Context, FPoff) -> + #test{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#test{src=Src,dst=Dst}. + conv_opnd(Opnd, FPoff, Context) -> case opnd_is_pseudo(Opnd) of false -> diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl index 4515822a34..f88a841cca 100644 --- a/lib/hipe/x86/hipe_x86_postpass.erl +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -120,19 +120,15 @@ peep([#move{src=Src1, dst=Dst}, %% ElimCmp0 %% -------- -peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) -> - case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and - ((Cond =:= 'eq') or (Cond =:= 'neq'))) of - true -> - Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, - Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end, - Test = #test{src=Src2, dst=#x86_imm{value=0}}, - Jump = #jcc{cc=Cond2, label=Lab}, - peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]); - _ -> - peep(Insns, [J,C|Res], Lst) - end; - +peep([#cmp{src=#x86_imm{value=0}, dst=Dst=#x86_temp{}}|Insns],Res,Lst) -> + %% TEST leaves the adjust flag undefined, whereas CMP sets it properly (in + %% this case to 0). However, since HiPE does not use any instructions that + %% read the adjust flag, we can do this transform safely. + peep(Insns, [#test{src=Dst, dst=Dst} | Res], [elimCmp0_1|Lst]); +peep([#cmp{src=Src=#x86_temp{}, dst=#x86_imm{value=0}}, + J=#jcc{cc=Cond}|Insns],Res,Lst) + when Cond =:= 'e'; Cond =:= 'ne' -> % We're commuting the comparison + peep(Insns, [J, #test{src=Src, dst=Src} | Res], [elimCmp0_2|Lst]); %% ElimCmpTest %% ----------- @@ -187,6 +183,18 @@ peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> peep(Insns, [B|Res], Lst) end; +%% LeaToAdd +%% This rule transforms lea into add when the destination is the same as one of +%% the operands. Sound because lea is never used where the condition codes are +%% live (and would be clobbered by add). +%% ---------- +peep([#lea{mem=#x86_mem{base=#x86_temp{reg=DstR},off=Src}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); +peep([#lea{mem=#x86_mem{base=Src,off=#x86_temp{reg=DstR}}, + temp=Dst=#x86_temp{reg=DstR}}|Insns], Res, Lst) -> + peep(Insns, [#alu{aluop='add',src=Src,dst=Dst}|Res], [leaToAdd|Lst]); + %% SubToDec %% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which %% changes reduction counter tests to use decl instead of subl. @@ -209,6 +217,11 @@ trivial_goto_elimination(Insns) -> goto_elim(Insns, []). goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> goto_elim([I|Insns], Res); +goto_elim([#jcc{cc=CC, label=Label} = IJCC, + #jmp_label{label=BranchTgt}, + #label{label=Label} = ILBL|Insns], Res) -> + goto_elim([IJCC#jcc{cc=hipe_x86:neg_cc(CC), label=BranchTgt}, + ILBL|Insns], Res); goto_elim([I | Insns], Res) -> goto_elim(Insns, [I|Res]); goto_elim([], Res) -> diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl index ff26a31877..942201a051 100644 --- a/lib/hipe/x86/hipe_x86_pp.erl +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -188,6 +188,12 @@ pp_insn(Dev, I, Pre) -> io:format(Dev, ", ", []), pp_dst(Dev, Dst), io:format(Dev, "\n", []); + #test{src=Src, dst=Dst} -> + io:format(Dev, "\ttest ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); #fp_binop{src=Src, dst=Dst, op=Op} -> io:format(Dev, "\t~s ", [Op]), pp_dst(Dev, Dst), diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl index edfd7b332c..1fd617570a 100644 --- a/lib/hipe/x86/hipe_x86_ra_finalise.erl +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -162,6 +162,10 @@ ra_insn(I, Map, FpMap) -> Src = ra_opnd(Src0, Map), Dst = ra_opnd(Dst0, Map), I#shift{src=Src,dst=Dst}; + #test{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#test{src=Src,dst=Dst}; _ -> exit({?MODULE,ra_insn,I}) end. diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl index 35de692e07..9371e4b1a5 100644 --- a/lib/hipe/x86/hipe_x86_ra_naive.erl +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -100,6 +100,8 @@ do_insn(I) -> % Insn -> Insn list do_fp_binop(I); #shift{} -> do_shift(I); + #test{} -> + do_test(I); #label{} -> [I]; #pseudo_jcc{} -> @@ -310,6 +312,11 @@ do_shift(I) -> FixDst ++ [I#shift{dst=Dst}] end. +do_test(I) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}]. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl index f496b71828..e7c397b5b7 100644 --- a/lib/hipe/x86/hipe_x86_ra_postconditions.erl +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -83,6 +83,8 @@ do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} do_fmove(I, TempMap, Strategy); #shift{} -> do_shift(I, TempMap, Strategy); + #test{} -> + do_test(I, TempMap, Strategy); _ -> %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, %% pseudo_tailcall_prepare, push, ret @@ -308,6 +310,14 @@ do_shift(I, TempMap, Strategy) -> {FixDst ++ [I#shift{dst=Dst}], DidSpill} end. +%%% Fix a test op. + +do_test(I, TempMap, Strategy) -> + #test{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#test{src=Src,dst=Dst}], DidSpill}. + %%% Fix the operands of a binary op. %%% 1. remove pseudos from any explicit memory operands %%% 2. if both operands are (implicit or explicit) memory operands, |