diff options
Diffstat (limited to 'lib/hipe/x86/hipe_rtl_to_x86.erl')
-rw-r--r-- | lib/hipe/x86/hipe_rtl_to_x86.erl | 865 |
1 files changed, 865 insertions, 0 deletions
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl new file mode 100644 index 0000000000..d77e4fed3b --- /dev/null +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -0,0 +1,865 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% Translate 3-address RTL code to 2-address pseudo-x86 code. + +-ifdef(HIPE_AMD64). +-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(ECX, rcx). +-define(EAX, rax). +-else. +-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(ECX, ecx). +-define(EAX, eax). +-endif. + +-module(?HIPE_RTL_TO_X86). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> % RTL function -> x86 defun + hipe_gensym:init(x86), + hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()), + hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals,_} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> + %% dst = src1 binop src2 + BinOp = conv_binop(hipe_rtl:alu_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + I2 = + case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of + true -> + conv_shift(Dst, Src1, BinOp, Src2); + false -> + conv_alu(Dst, Src1, BinOp, Src2, []) + end, + {FixSrc1++FixSrc2++I2, Map2, Data}; + #alub{} -> + %% dst = src1 op src2; if COND goto label + BinOp = conv_binop(hipe_rtl:alub_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + Cc = conv_cond(hipe_rtl:alub_cond(I)), + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), + {FixSrc1++FixSrc2++I2, Map2, Data}; + #branch{} -> + %% <unused> = src1 - src2; if COND goto label + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + Cc = conv_cond(hipe_rtl:branch_cond(I)), + I2 = conv_branch(Src1, Cc, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {FixSrc1++FixSrc2++I2, Map1, Data}; + #call{} -> + %% push <arg1> + %% ... + %% push <argn> + %% eax := call <Fun>; if exn goto <Fail> else goto Next + %% Next: + %% <Dst> := eax + %% goto <Cont> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + I2 = conv_call(Dsts, Fun, Args, + hipe_rtl:call_continuation(I), + hipe_rtl:call_fail(I), + hipe_rtl:call_type(I)), + %% XXX Fixme: this ++ is probably inefficient. + {FixArgs++I2, Map2, Data}; + #comment{} -> + I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}; + #enter{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)), + {FixArgs++I2, Map1, Data}; + #goto{} -> + I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}; + #label{} -> + I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}; + #load{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of + {byte, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {byte, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {int16, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {int16, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {LoadSize, LoadSign} -> + mk_load(LoadSize, LoadSign, Src, Off, Dst) + end, + {FixSrc++FixOff++I2, Map2, Data}; + #load_address{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = hipe_x86:mk_imm_from_addr(Addr, Type), + I2 = mk_load_address(Type, Src, Dst), + {I2, Map0, Data}; + #load_atom{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), + I2 = [hipe_x86:mk_move(Src, Dst)], + {I2, Map0, Data}; + #move{} -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = [hipe_x86:mk_move(Src, Dst)], + {FixSrc++I2, Map1, Data}; + #return{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + %% frame will fill in npop later, hence the "mk_ret(-1)" + I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), + {FixArgs++I2, Map0, Data}; + #store{} -> + {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), + {FixSrc++FixOff++I2, Map2, Data}; + #switch{} -> % this one also updates Data :-( + %% from hipe_rtl2sparc, but we use a hairy addressing mode + %% instead of doing the arithmetic manually + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + I2 = mk_jmp_switch(Index, JTabLab, Labels), + {I2, Map1, NewData}; + #fload{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], + {I2, Map2, Data}; + #fstore{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))], + {I2, Map2, Data}; + #fp{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map), + {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0), + {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1), + FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)), + I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2), + {I2, Map2, Data}; + #fp_unop{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0), + FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)), + I2 = conv_fp_unary(Dst, Src, FpUnOp), + {I2, Map1, Data}; + #fmove{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + #fconv{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + X -> + %% gctest?? + %% jmp, jmp_link, jsr, esr, multimove, + %% stackneed, pop_frame, restore_frame, save_frame + throw({?MODULE, {"unknown RTL instruction", X}}) + end. + +%%% Finalise the conversion of a 3-address ALU operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_alu(Dst, Src1, 'imul', Src2, Tail) -> + mk_imul(Src1, Src2, Dst, Tail); +conv_alu(Dst, Src1, BinOp, Src2, Tail) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y + true -> % y = x op y, where y != x + case binop_commutes(BinOp) of + true -> % y = y op x + [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x + false -> % y = x op y, where op doesn't commute + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t + end + end + end. + +mk_imul(Src1, Src2, Dst, Tail) -> + case hipe_x86:is_imm(Src1) of + true -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_iit(Src1, Src2, Dst, Tail); + _ -> + mk_imul_itt(Src1, Src2, Dst, Tail) + end; + _ -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_itt(Src2, Src1, Dst, Tail); + _ -> + mk_imul_ttt(Src1, Src2, Dst, Tail) + end + end. + +mk_imul_iit(Src1, Src2, Dst, Tail) -> + io:format("~w: RTL mul with two immediates\n", [?MODULE]), + Tmp2 = new_untagged_temp(), + [hipe_x86:mk_move(Src2, Tmp2) | + mk_imul_itt(Src1, Tmp2, Dst, Tail)]. + +mk_imul_itt(Src1, Src2, Dst, Tail) -> + [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail]. + +mk_imul_ttt(Src1, Src2, Dst, Tail) -> + case same_opnd(Dst, Src1) of + true -> + [hipe_x86:mk_imul([], Src2, Dst) | Tail]; + false -> + case same_opnd(Dst, Src2) of + true -> + [hipe_x86:mk_imul([], Src1, Dst) | Tail]; + false -> + [hipe_x86:mk_move(Src1, Dst), + hipe_x86:mk_imul([], Src2, Dst) | Tail] + end + end. + +conv_shift(Dst, Src1, BinOp, Src2) -> + {NewSrc2,I1} = + case hipe_x86:is_imm(Src2) of + true -> + {Src2, []}; + false -> + NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]} + end, + I2 = case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y + true -> % y = x op y, no shift op commutes + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst)] % y = t + end + end, + I1 ++ I2. + +%%% Finalise the conversion of a conditional branch operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + case hipe_x86:is_imm(Src1) of + false -> + mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + true -> + case hipe_x86:is_imm(Src2) of + false -> + NewCc = commute_cc(Cc), + mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + true -> + %% two immediates, let the optimiser clean it up + Tmp = new_untagged_temp(), + [hipe_x86:mk_move(Src1, Tmp) | + mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + end + end. + +mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + %% PRE: not(is_imm(Src1)) + [hipe_x86:mk_cmp(Src2, Src1), + hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. + +%%% Convert an RTL ALU or ALUB binary operator. + +conv_binop(BinOp) -> + case BinOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'shl'; + 'srl' -> 'shr'; + 'sra' -> 'sar'; + 'mul' -> 'imul'; + %% andnot ??? + _ -> exit({?MODULE, {"unknown binop", BinOp}}) + end. + +binop_commutes(BinOp) -> + case BinOp of + 'add' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + _ -> false + end. + +%%% Convert an RTL conditional operator. + +conv_cond(Cond) -> + case Cond of + eq -> 'e'; + ne -> 'ne'; + gt -> 'g'; + gtu -> 'a'; + ge -> 'ge'; + geu -> 'ae'; + lt -> 'l'; + ltu -> 'b'; + le -> 'le'; + leu -> 'be'; + overflow -> 'o'; + not_overflow -> 'no'; + _ -> exit({?MODULE, {"unknown rtl cond", Cond}}) + end. + +commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x + case Cc of + 'e' -> 'e'; % ==, == + 'ne' -> 'ne'; % !=, != + 'g' -> 'l'; % >, < + 'a' -> 'b'; % >u, <u + 'ge' -> 'le'; % >=, <= + 'ae' -> 'be'; % >=u, <=u + 'l' -> 'g'; % <, > + 'b' -> 'a'; % <u, >u + 'le' -> 'ge'; % <=, >= + 'be' -> 'ae'; % <=u, >=u + %% overflow/not_overflow: n/a + _ -> exit({?MODULE, {"unknown cc", Cc}}) + end. + +%%% Test if Dst and Src are the same operand. + +same_opnd(Dst, Src) -> Dst =:= Src. + +%%% Finalise the conversion of a tailcall instruction. + +conv_tailcall(Fun, Args, Linkage) -> + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + move_actuals(RegArgs, + [hipe_x86:mk_pseudo_tailcall_prepare(), + hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). + +split_args(Args) -> + split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []). +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = ?HIPE_X86_REGISTERS:arg(I), + Temp = hipe_x86:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +move_actuals([], Rest) -> Rest; +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +move_formals([], Rest) -> Rest; +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +%%% Finalise the conversion of a call instruction. + +conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:is_prim(Fun) of + true -> + conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:prim_prim(Prim) of + 'fwait' -> + conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage); + _ -> + conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +conv_fwait_call([], [], [], [], not_remote) -> + [hipe_x86:mk_fp_unop('fwait', [])]. + +conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case do_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(x86), + {NewContLab, [hipe_x86:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + %% + %% This should be fixed to propagate "fallthrough calls" + %% When the rest of the backend supports them. + NewContLab = hipe_gensym:get_next_label(x86), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation + %% jump to it. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves ++ + [hipe_x86:mk_jmp_label(ContLab)]]} + end + end, + SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}), + CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +do_push_args([Arg|Args], Tail) -> + [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)]; +do_push_args([], Tail) -> + Tail. + +%%% Move return values from the return value registers. + +do_call_results(DstList) -> + do_call_results(DstList, 0, []). + +do_call_results([Dst|DstList], I, Rest) -> + Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + do_call_results(DstList, I+1, [Move|Rest]); +do_call_results([], _, Insns) -> Insns. + +%%% Move return values to the return value registers. + +move_retvals(SrcLst, Rest) -> + move_retvals(SrcLst, 0, Rest). + +move_retvals([Src|SrcLst], I, Rest) -> + Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + move_retvals(SrcLst, I+1, [Move|Rest]); +move_retvals([], _, Insns) -> Insns. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case Fun of + Prim when is_atom(Prim) -> + {hipe_x86:mk_prim(Prim), Map}; + {M,F,A} when is_atom(M), is_atom(F), is_integer(A) -> + {hipe_x86:mk_mfa(M,F,A), Map}; + _ -> + exit({?MODULE,conv_fun,Fun}) + end + end + end. + +%%% Convert an RTL source operand (imm/var/reg). + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + conv_imm(Opnd, Map); + false -> + {NewOpnd,NewMap} = conv_dst(Opnd, Map), + {[], NewOpnd, NewMap} + end. + +-ifdef(HIPE_AMD64). +conv_imm(Opnd, Map) -> + ImmVal = hipe_rtl:imm_value(Opnd), + case is_imm64(ImmVal) of + true -> + Temp = hipe_x86:mk_new_temp('untagged'), + {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map}; + false -> + {[], hipe_x86:mk_imm(ImmVal), Map} + end. + +is_imm64(Value) when is_integer(Value) -> + (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); +is_imm64({_,atom}) -> false; % Atoms are 32 bits. +is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,_}) -> true . % Other relocs are 64 bits. +-else. +conv_imm(Opnd, Map) -> + {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}. +-endif. + +conv_src_list([O|Os], Map) -> + {NewInstr, V, Map1} = conv_src(O, Map), + {Instrs, Vs, Map2} = conv_src_list(Os, Map1), + {Instrs++NewInstr, [V|Vs], Map2}; +conv_src_list([], Map) -> + {[], [], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + case ?HIPE_X86_REGISTERS:is_precoloured(Name) of + true -> + case ?HIPE_X86_REGISTERS:proc_offset(Name) of + false -> + {hipe_x86:mk_temp(Name, Type), Map}; + Offset -> + Preg = ?HIPE_X86_REGISTERS:proc_pointer(), + Pbase = hipe_x86:mk_temp(Preg, 'untagged'), + Poff = hipe_x86:mk_imm(Offset), + {hipe_x86:mk_mem(Pbase, Poff, Type), Map} + end; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_x86:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + false ->'untagged' + end, + Dst = + if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable + true -> hipe_x86:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% typeof_src -- what's src's type? + +typeof_src(Src) -> + case hipe_x86:is_imm(Src) of + true -> + 'untagged'; + _ -> + typeof_dst(Src) + end. + +%%% typeof_dst -- what's dst's type? + +typeof_dst(Dst) -> + case hipe_x86:is_temp(Dst) of + true -> + hipe_x86:temp_type(Dst); + _ -> + hipe_x86:mem_type(Dst) + end. + +%%% clone_dst -- conjure up a scratch reg with same type as dst + +clone_dst(Dst) -> + hipe_x86:mk_new_temp(typeof_dst(Dst)). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_x86:mk_new_temp('untagged'). + +%%% Map from RTL var/reg operands to x86 temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +%%% Finalise the conversion of a 2-address FP operation. + +conv_fp_unary(Dst, Src, FpUnOp) -> + case same_opnd(Dst, Src) of + true -> + [hipe_x86:mk_fp_unop(FpUnOp, Dst)]; + _ -> + [hipe_x86:mk_fmove(Src, Dst), + hipe_x86:mk_fp_unop(FpUnOp, Dst)] + end. + +conv_fp_unop(RtlFpUnOp) -> + case RtlFpUnOp of + 'fchs' -> 'fchs' + end. + +%%% Finalise the conversion of a 3-address FP operation. + +conv_fp_binary(Dst, Src1, FpBinOp, Src2) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_fmove(Src1, Dst), % z = x + hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y + true -> % y = x op y, where y != x + case fp_binop_commutes(FpBinOp) of + true -> % y = y op x + [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x + false -> % y = x op y, where op doesn't commute + RevFpBinOp = reverse_fp_binop(FpBinOp), + [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)] + end + end + end. + +%%% Convert an RTL FP binary operator. + +conv_fp_binop(RtlFpBinOp) -> + case RtlFpBinOp of + 'fadd' -> 'fadd'; + 'fdiv' -> 'fdiv'; + 'fmul' -> 'fmul'; + 'fsub' -> 'fsub' + end. + +fp_binop_commutes(FpBinOp) -> + case FpBinOp of + 'fadd' -> true; + 'fmul' -> true; + _ -> false + end. + +reverse_fp_binop(FpBinOp) -> + case FpBinOp of + 'fsub' -> 'fsubr'; + 'fdiv' -> 'fdivr' + end. + +%%% Create a jmp_switch instruction. + +-ifdef(HIPE_AMD64). +mk_jmp_switch(Index, JTabLab, Labels) -> + JTabReg = hipe_x86:mk_new_temp('untagged'), + JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant), + [hipe_x86:mk_move64(JTabImm, JTabReg), + hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)]. +-else. +mk_jmp_switch(Index, JTabLab, Labels) -> + %% this is equivalent to "jmp *JTabLab(,Index,4)" + %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice) + [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)]. +-endif. + +%%% Finalise the translation of a load_address instruction. + +-ifdef(HIPE_AMD64). +mk_load_address(Type, Src, Dst) -> + case Type of + c_const -> % 32 bits + [hipe_x86:mk_move(Src, Dst)]; + _ -> + [hipe_x86:mk_move64(Src, Dst)] + end. +-else. +mk_load_address(_Type, Src, Dst) -> + [hipe_x86:mk_move(Src, Dst)]. +-endif. + +%%% Translate 32-bit and larger loads. + +-ifdef(HIPE_AMD64). +mk_load(LoadSize, LoadSign, Src, Off, Dst) -> + case {LoadSize, LoadSign} of + {int32, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {int32, unsigned} -> + %% The processor zero-extends for us. No need for 'movzx'. + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {_, _} -> + mk_load_word(Src, Off, Dst) + end. +-else. +mk_load(_LoadSize, _LoadSign, Src, Off, Dst) -> + mk_load_word(Src, Off, Dst). +-endif. + +mk_load_word(Src, Off, Dst) -> + Type = typeof_dst(Dst), + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)]. + +%%% Finalise the translation of a store instruction. + +-ifdef(HIPE_AMD64). +mk_store(RtlStoreSize, Src, Ptr, Off) -> + Type = case RtlStoreSize of + word -> + typeof_src(Src); + OtherType -> + OtherType + end, + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]. +-else. +mk_store(RtlStoreSize, Src, Ptr, Off) -> + case RtlStoreSize of + word -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int32 -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int16 -> + Type = 'int16', + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + byte -> + Type = 'byte', + {NewSrc, I1} = conv_small_store(Src), + I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))] + end. + +conv_small_store(Src) -> + case hipe_x86:is_imm(Src) of + true -> + {Src, []}; + false -> + NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]} + end. +-endif. |