aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/x86/hipe_rtl_to_x86.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/x86/hipe_rtl_to_x86.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/hipe/x86/hipe_rtl_to_x86.erl')
-rw-r--r--lib/hipe/x86/hipe_rtl_to_x86.erl865
1 files changed, 865 insertions, 0 deletions
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
new file mode 100644
index 0000000000..d77e4fed3b
--- /dev/null
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -0,0 +1,865 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%%
+%%% Translate 3-address RTL code to 2-address pseudo-x86 code.
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(ECX, rcx).
+-define(EAX, rax).
+-else.
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(ECX, ecx).
+-define(EAX, eax).
+-endif.
+
+-module(?HIPE_RTL_TO_X86).
+-export([translate/1]).
+
+-include("../rtl/hipe_rtl.hrl").
+
+translate(RTL) -> % RTL function -> x86 defun
+ hipe_gensym:init(x86),
+ hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()),
+ hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)),
+ Map0 = vmap_empty(),
+ {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0),
+ OldData = hipe_rtl:rtl_data(RTL),
+ {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData),
+ {RegFormals,_} = split_args(Formals),
+ Code =
+ case RegFormals of
+ [] -> Code0;
+ _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) |
+ move_formals(RegFormals, Code0)]
+ end,
+ IsClosure = hipe_rtl:rtl_is_closure(RTL),
+ IsLeaf = hipe_rtl:rtl_is_leaf(RTL),
+ hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL),
+ Formals,
+ IsClosure,
+ IsLeaf,
+ Code,
+ NewData,
+ [],
+ []).
+
+conv_insn_list([H|T], Map, Data) ->
+ {NewH, NewMap, NewData1} = conv_insn(H, Map, Data),
+ %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]),
+ {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1),
+ {NewH ++ NewT, NewData2};
+conv_insn_list([], _, Data) ->
+ {[], Data}.
+
+conv_insn(I, Map, Data) ->
+ case I of
+ #alu{} ->
+ %% dst = src1 binop src2
+ BinOp = conv_binop(hipe_rtl:alu_op(I)),
+ {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map),
+ {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0),
+ {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1),
+ I2 =
+ case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of
+ true ->
+ conv_shift(Dst, Src1, BinOp, Src2);
+ false ->
+ conv_alu(Dst, Src1, BinOp, Src2, [])
+ end,
+ {FixSrc1++FixSrc2++I2, Map2, Data};
+ #alub{} ->
+ %% dst = src1 op src2; if COND goto label
+ BinOp = conv_binop(hipe_rtl:alub_op(I)),
+ {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
+ {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
+ {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+ Cc = conv_cond(hipe_rtl:alub_cond(I)),
+ I1 = [hipe_x86:mk_pseudo_jcc(Cc,
+ hipe_rtl:alub_true_label(I),
+ hipe_rtl:alub_false_label(I),
+ hipe_rtl:alub_pred(I))],
+ I2 = conv_alu(Dst, Src1, BinOp, Src2, I1),
+ {FixSrc1++FixSrc2++I2, Map2, Data};
+ #branch{} ->
+ %% <unused> = src1 - src2; if COND goto label
+ {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
+ {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
+ Cc = conv_cond(hipe_rtl:branch_cond(I)),
+ I2 = conv_branch(Src1, Cc, Src2,
+ hipe_rtl:branch_true_label(I),
+ hipe_rtl:branch_false_label(I),
+ hipe_rtl:branch_pred(I)),
+ {FixSrc1++FixSrc2++I2, Map1, Data};
+ #call{} ->
+ %% push <arg1>
+ %% ...
+ %% push <argn>
+ %% eax := call <Fun>; if exn goto <Fail> else goto Next
+ %% Next:
+ %% <Dst> := eax
+ %% goto <Cont>
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map),
+ {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0),
+ {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1),
+ I2 = conv_call(Dsts, Fun, Args,
+ hipe_rtl:call_continuation(I),
+ hipe_rtl:call_fail(I),
+ hipe_rtl:call_type(I)),
+ %% XXX Fixme: this ++ is probably inefficient.
+ {FixArgs++I2, Map2, Data};
+ #comment{} ->
+ I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))],
+ {I2, Map, Data};
+ #enter{} ->
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map),
+ {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0),
+ I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)),
+ {FixArgs++I2, Map1, Data};
+ #goto{} ->
+ I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))],
+ {I2, Map, Data};
+ #label{} ->
+ I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))],
+ {I2, Map, Data};
+ #load{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0),
+ {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1),
+ I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of
+ {byte, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)];
+ {byte, unsigned} ->
+ [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)];
+ {int16, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)];
+ {int16, unsigned} ->
+ [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)];
+ {LoadSize, LoadSign} ->
+ mk_load(LoadSize, LoadSign, Src, Off, Dst)
+ end,
+ {FixSrc++FixOff++I2, Map2, Data};
+ #load_address{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map),
+ Addr = hipe_rtl:load_address_addr(I),
+ Type = hipe_rtl:load_address_type(I),
+ Src = hipe_x86:mk_imm_from_addr(Addr, Type),
+ I2 = mk_load_address(Type, Src, Dst),
+ {I2, Map0, Data};
+ #load_atom{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map),
+ Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)),
+ I2 = [hipe_x86:mk_move(Src, Dst)],
+ {I2, Map0, Data};
+ #move{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0),
+ I2 = [hipe_x86:mk_move(Src, Dst)],
+ {FixSrc++I2, Map1, Data};
+ #return{} ->
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map),
+ %% frame will fill in npop later, hence the "mk_ret(-1)"
+ I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]),
+ {FixArgs++I2, Map0, Data};
+ #store{} ->
+ {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),
+ {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),
+ I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off),
+ {FixSrc++FixOff++I2, Map2, Data};
+ #switch{} -> % this one also updates Data :-(
+ %% from hipe_rtl2sparc, but we use a hairy addressing mode
+ %% instead of doing the arithmetic manually
+ Labels = hipe_rtl:switch_labels(I),
+ LMap = [{label,L} || L <- Labels],
+ {NewData, JTabLab} =
+ case hipe_rtl:switch_sort_order(I) of
+ [] ->
+ hipe_consttab:insert_block(Data, word, LMap);
+ SortOrder ->
+ hipe_consttab:insert_sorted_block(
+ Data, word, LMap, SortOrder)
+ end,
+ %% no immediates allowed here
+ {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map),
+ I2 = mk_jmp_switch(Index, JTabLab, Labels),
+ {I2, Map1, NewData};
+ #fload{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0),
+ {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1),
+ I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)],
+ {I2, Map2, Data};
+ #fstore{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0),
+ {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1),
+ I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))],
+ {I2, Map2, Data};
+ #fp{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map),
+ {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0),
+ {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1),
+ FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)),
+ I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2),
+ {I2, Map2, Data};
+ #fp_unop{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0),
+ FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)),
+ I2 = conv_fp_unary(Dst, Src, FpUnOp),
+ {I2, Map1, Data};
+ #fmove{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0),
+ I2 = [hipe_x86:mk_fmove(Src, Dst)],
+ {I2, Map1, Data};
+ #fconv{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0),
+ I2 = [hipe_x86:mk_fmove(Src, Dst)],
+ {I2, Map1, Data};
+ X ->
+ %% gctest??
+ %% jmp, jmp_link, jsr, esr, multimove,
+ %% stackneed, pop_frame, restore_frame, save_frame
+ throw({?MODULE, {"unknown RTL instruction", X}})
+ end.
+
+%%% Finalise the conversion of a 3-address ALU operation, taking
+%%% care to not introduce more temps and moves than necessary.
+
+conv_alu(Dst, Src1, 'imul', Src2, Tail) ->
+ mk_imul(Src1, Src2, Dst, Tail);
+conv_alu(Dst, Src1, BinOp, Src2, Tail) ->
+ case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_move(Src1, Dst), % z = x
+ hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y
+ true -> % y = x op y, where y != x
+ case binop_commutes(BinOp) of
+ true -> % y = y op x
+ [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x
+ false -> % y = x op y, where op doesn't commute
+ Tmp = clone_dst(Dst),
+ [hipe_x86:mk_move(Src1, Tmp), % t = x
+ hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y
+ hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t
+ end
+ end
+ end.
+
+mk_imul(Src1, Src2, Dst, Tail) ->
+ case hipe_x86:is_imm(Src1) of
+ true ->
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ mk_imul_iit(Src1, Src2, Dst, Tail);
+ _ ->
+ mk_imul_itt(Src1, Src2, Dst, Tail)
+ end;
+ _ ->
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ mk_imul_itt(Src2, Src1, Dst, Tail);
+ _ ->
+ mk_imul_ttt(Src1, Src2, Dst, Tail)
+ end
+ end.
+
+mk_imul_iit(Src1, Src2, Dst, Tail) ->
+ io:format("~w: RTL mul with two immediates\n", [?MODULE]),
+ Tmp2 = new_untagged_temp(),
+ [hipe_x86:mk_move(Src2, Tmp2) |
+ mk_imul_itt(Src1, Tmp2, Dst, Tail)].
+
+mk_imul_itt(Src1, Src2, Dst, Tail) ->
+ [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail].
+
+mk_imul_ttt(Src1, Src2, Dst, Tail) ->
+ case same_opnd(Dst, Src1) of
+ true ->
+ [hipe_x86:mk_imul([], Src2, Dst) | Tail];
+ false ->
+ case same_opnd(Dst, Src2) of
+ true ->
+ [hipe_x86:mk_imul([], Src1, Dst) | Tail];
+ false ->
+ [hipe_x86:mk_move(Src1, Dst),
+ hipe_x86:mk_imul([], Src2, Dst) | Tail]
+ end
+ end.
+
+conv_shift(Dst, Src1, BinOp, Src2) ->
+ {NewSrc2,I1} =
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ {Src2, []};
+ false ->
+ NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'),
+ {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]}
+ end,
+ I2 = case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_move(Src1, Dst), % z = x
+ hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y
+ true -> % y = x op y, no shift op commutes
+ Tmp = clone_dst(Dst),
+ [hipe_x86:mk_move(Src1, Tmp), % t = x
+ hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y
+ hipe_x86:mk_move(Tmp, Dst)] % y = t
+ end
+ end,
+ I1 ++ I2.
+
+%%% Finalise the conversion of a conditional branch operation, taking
+%%% care to not introduce more temps and moves than necessary.
+
+conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+ case hipe_x86:is_imm(Src1) of
+ false ->
+ mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred);
+ true ->
+ case hipe_x86:is_imm(Src2) of
+ false ->
+ NewCc = commute_cc(Cc),
+ mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred);
+ true ->
+ %% two immediates, let the optimiser clean it up
+ Tmp = new_untagged_temp(),
+ [hipe_x86:mk_move(Src1, Tmp) |
+ mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)]
+ end
+ end.
+
+mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+ %% PRE: not(is_imm(Src1))
+ [hipe_x86:mk_cmp(Src2, Src1),
+ hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)].
+
+%%% Convert an RTL ALU or ALUB binary operator.
+
+conv_binop(BinOp) ->
+ case BinOp of
+ 'add' -> 'add';
+ 'sub' -> 'sub';
+ 'or' -> 'or';
+ 'and' -> 'and';
+ 'xor' -> 'xor';
+ 'sll' -> 'shl';
+ 'srl' -> 'shr';
+ 'sra' -> 'sar';
+ 'mul' -> 'imul';
+ %% andnot ???
+ _ -> exit({?MODULE, {"unknown binop", BinOp}})
+ end.
+
+binop_commutes(BinOp) ->
+ case BinOp of
+ 'add' -> true;
+ 'or' -> true;
+ 'and' -> true;
+ 'xor' -> true;
+ _ -> false
+ end.
+
+%%% Convert an RTL conditional operator.
+
+conv_cond(Cond) ->
+ case Cond of
+ eq -> 'e';
+ ne -> 'ne';
+ gt -> 'g';
+ gtu -> 'a';
+ ge -> 'ge';
+ geu -> 'ae';
+ lt -> 'l';
+ ltu -> 'b';
+ le -> 'le';
+ leu -> 'be';
+ overflow -> 'o';
+ not_overflow -> 'no';
+ _ -> exit({?MODULE, {"unknown rtl cond", Cond}})
+ end.
+
+commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x
+ case Cc of
+ 'e' -> 'e'; % ==, ==
+ 'ne' -> 'ne'; % !=, !=
+ 'g' -> 'l'; % >, <
+ 'a' -> 'b'; % >u, <u
+ 'ge' -> 'le'; % >=, <=
+ 'ae' -> 'be'; % >=u, <=u
+ 'l' -> 'g'; % <, >
+ 'b' -> 'a'; % <u, >u
+ 'le' -> 'ge'; % <=, >=
+ 'be' -> 'ae'; % <=u, >=u
+ %% overflow/not_overflow: n/a
+ _ -> exit({?MODULE, {"unknown cc", Cc}})
+ end.
+
+%%% Test if Dst and Src are the same operand.
+
+same_opnd(Dst, Src) -> Dst =:= Src.
+
+%%% Finalise the conversion of a tailcall instruction.
+
+conv_tailcall(Fun, Args, Linkage) ->
+ Arity = length(Args),
+ {RegArgs,StkArgs} = split_args(Args),
+ move_actuals(RegArgs,
+ [hipe_x86:mk_pseudo_tailcall_prepare(),
+ hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]).
+
+split_args(Args) ->
+ split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []).
+split_args(I, N, [Arg|Args], RegArgs) when I < N ->
+ Reg = ?HIPE_X86_REGISTERS:arg(I),
+ Temp = hipe_x86:mk_temp(Reg, 'tagged'),
+ split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]);
+split_args(_, _, StkArgs, RegArgs) ->
+ {RegArgs, StkArgs}.
+
+move_actuals([], Rest) -> Rest;
+move_actuals([{Src,Dst}|Actuals], Rest) ->
+ move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]).
+
+move_formals([], Rest) -> Rest;
+move_formals([{Dst,Src}|Formals], Rest) ->
+ move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]).
+
+%%% Finalise the conversion of a call instruction.
+
+conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) ->
+ case hipe_x86:is_prim(Fun) of
+ true ->
+ conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage);
+ false ->
+ conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage)
+ end.
+
+conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) ->
+ case hipe_x86:prim_prim(Prim) of
+ 'fwait' ->
+ conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage);
+ _ ->
+ conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage)
+ end.
+
+conv_fwait_call([], [], [], [], not_remote) ->
+ [hipe_x86:mk_fp_unop('fwait', [])].
+
+conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) ->
+ %% The backend does not support pseudo_calls without a
+ %% continuation label, so we make sure each call has one.
+ {RealContLab, Tail} =
+ case do_call_results(Dsts) of
+ [] ->
+ %% Avoid consing up a dummy basic block if the moves list
+ %% is empty, as is typical for calls to suspend/0.
+ %% This should be subsumed by a general "optimise the CFG"
+ %% module, and could probably be removed.
+ case ContLab of
+ [] ->
+ NewContLab = hipe_gensym:get_next_label(x86),
+ {NewContLab, [hipe_x86:mk_label(NewContLab)]};
+ _ ->
+ {ContLab, []}
+ end;
+ Moves ->
+ %% Change the call to continue at a new basic block.
+ %% In this block move the result registers to the Dsts,
+ %% then continue at the call's original continuation.
+ %%
+ %% This should be fixed to propagate "fallthrough calls"
+ %% When the rest of the backend supports them.
+ NewContLab = hipe_gensym:get_next_label(x86),
+ case ContLab of
+ [] ->
+ %% This is just a fallthrough
+ %% No jump back after the moves.
+ {NewContLab,
+ [hipe_x86:mk_label(NewContLab) |
+ Moves]};
+ _ ->
+ %% The call has a continuation
+ %% jump to it.
+ {NewContLab,
+ [hipe_x86:mk_label(NewContLab) |
+ Moves ++
+ [hipe_x86:mk_jmp_label(ContLab)]]}
+ end
+ end,
+ SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}),
+ CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage),
+ {RegArgs,StkArgs} = split_args(Args),
+ do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])).
+
+do_push_args([Arg|Args], Tail) ->
+ [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)];
+do_push_args([], Tail) ->
+ Tail.
+
+%%% Move return values from the return value registers.
+
+do_call_results(DstList) ->
+ do_call_results(DstList, 0, []).
+
+do_call_results([Dst|DstList], I, Rest) ->
+ Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'),
+ Move = hipe_x86:mk_move(Src, Dst),
+ do_call_results(DstList, I+1, [Move|Rest]);
+do_call_results([], _, Insns) -> Insns.
+
+%%% Move return values to the return value registers.
+
+move_retvals(SrcLst, Rest) ->
+ move_retvals(SrcLst, 0, Rest).
+
+move_retvals([Src|SrcLst], I, Rest) ->
+ Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'),
+ Move = hipe_x86:mk_move(Src, Dst),
+ move_retvals(SrcLst, I+1, [Move|Rest]);
+move_retvals([], _, Insns) -> Insns.
+
+%%% Convert a 'fun' operand (MFA, prim, or temp)
+
+conv_fun(Fun, Map) ->
+ case hipe_rtl:is_var(Fun) of
+ true ->
+ conv_dst(Fun, Map);
+ false ->
+ case hipe_rtl:is_reg(Fun) of
+ true ->
+ conv_dst(Fun, Map);
+ false ->
+ case Fun of
+ Prim when is_atom(Prim) ->
+ {hipe_x86:mk_prim(Prim), Map};
+ {M,F,A} when is_atom(M), is_atom(F), is_integer(A) ->
+ {hipe_x86:mk_mfa(M,F,A), Map};
+ _ ->
+ exit({?MODULE,conv_fun,Fun})
+ end
+ end
+ end.
+
+%%% Convert an RTL source operand (imm/var/reg).
+
+conv_src(Opnd, Map) ->
+ case hipe_rtl:is_imm(Opnd) of
+ true ->
+ conv_imm(Opnd, Map);
+ false ->
+ {NewOpnd,NewMap} = conv_dst(Opnd, Map),
+ {[], NewOpnd, NewMap}
+ end.
+
+-ifdef(HIPE_AMD64).
+conv_imm(Opnd, Map) ->
+ ImmVal = hipe_rtl:imm_value(Opnd),
+ case is_imm64(ImmVal) of
+ true ->
+ Temp = hipe_x86:mk_new_temp('untagged'),
+ {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map};
+ false ->
+ {[], hipe_x86:mk_imm(ImmVal), Map}
+ end.
+
+is_imm64(Value) when is_integer(Value) ->
+ (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1);
+is_imm64({_,atom}) -> false; % Atoms are 32 bits.
+is_imm64({_,c_const}) -> false; % c_consts are 32 bits.
+is_imm64({_,_}) -> true . % Other relocs are 64 bits.
+-else.
+conv_imm(Opnd, Map) ->
+ {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}.
+-endif.
+
+conv_src_list([O|Os], Map) ->
+ {NewInstr, V, Map1} = conv_src(O, Map),
+ {Instrs, Vs, Map2} = conv_src_list(Os, Map1),
+ {Instrs++NewInstr, [V|Vs], Map2};
+conv_src_list([], Map) ->
+ {[], [], Map}.
+
+%%% Convert an RTL destination operand (var/reg).
+
+conv_dst(Opnd, Map) ->
+ {Name, Type} =
+ case hipe_rtl:is_var(Opnd) of
+ true ->
+ {hipe_rtl:var_index(Opnd), 'tagged'};
+ false ->
+ case hipe_rtl:is_fpreg(Opnd) of
+ true ->
+ {hipe_rtl:fpreg_index(Opnd), 'double'};
+ false ->
+ {hipe_rtl:reg_index(Opnd), 'untagged'}
+ end
+ end,
+ case ?HIPE_X86_REGISTERS:is_precoloured(Name) of
+ true ->
+ case ?HIPE_X86_REGISTERS:proc_offset(Name) of
+ false ->
+ {hipe_x86:mk_temp(Name, Type), Map};
+ Offset ->
+ Preg = ?HIPE_X86_REGISTERS:proc_pointer(),
+ Pbase = hipe_x86:mk_temp(Preg, 'untagged'),
+ Poff = hipe_x86:mk_imm(Offset),
+ {hipe_x86:mk_mem(Pbase, Poff, Type), Map}
+ end;
+ false ->
+ case vmap_lookup(Map, Opnd) of
+ {value, NewTemp} ->
+ {NewTemp, Map};
+ _ ->
+ NewTemp = hipe_x86:mk_new_temp(Type),
+ {NewTemp, vmap_bind(Map, Opnd, NewTemp)}
+ end
+ end.
+
+conv_dst_list([O|Os], Map) ->
+ {Dst, Map1} = conv_dst(O, Map),
+ {Dsts, Map2} = conv_dst_list(Os, Map1),
+ {[Dst|Dsts], Map2};
+conv_dst_list([], Map) ->
+ {[], Map}.
+
+conv_formals(Os, Map) ->
+ conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []).
+
+conv_formals(N, [O|Os], Map, Res) ->
+ Type =
+ case hipe_rtl:is_var(O) of
+ true -> 'tagged';
+ false ->'untagged'
+ end,
+ Dst =
+ if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable
+ true -> hipe_x86:mk_new_nonallocatable_temp(Type)
+ end,
+ Map1 = vmap_bind(Map, O, Dst),
+ conv_formals(N-1, Os, Map1, [Dst|Res]);
+conv_formals(_, [], Map, Res) ->
+ {lists:reverse(Res), Map}.
+
+%%% typeof_src -- what's src's type?
+
+typeof_src(Src) ->
+ case hipe_x86:is_imm(Src) of
+ true ->
+ 'untagged';
+ _ ->
+ typeof_dst(Src)
+ end.
+
+%%% typeof_dst -- what's dst's type?
+
+typeof_dst(Dst) ->
+ case hipe_x86:is_temp(Dst) of
+ true ->
+ hipe_x86:temp_type(Dst);
+ _ ->
+ hipe_x86:mem_type(Dst)
+ end.
+
+%%% clone_dst -- conjure up a scratch reg with same type as dst
+
+clone_dst(Dst) ->
+ hipe_x86:mk_new_temp(typeof_dst(Dst)).
+
+%%% new_untagged_temp -- conjure up an untagged scratch reg
+
+new_untagged_temp() ->
+ hipe_x86:mk_new_temp('untagged').
+
+%%% Map from RTL var/reg operands to x86 temps.
+
+vmap_empty() ->
+ gb_trees:empty().
+
+vmap_lookup(Map, Key) ->
+ gb_trees:lookup(Key, Map).
+
+vmap_bind(Map, Key, Val) ->
+ gb_trees:insert(Key, Val, Map).
+
+%%% Finalise the conversion of a 2-address FP operation.
+
+conv_fp_unary(Dst, Src, FpUnOp) ->
+ case same_opnd(Dst, Src) of
+ true ->
+ [hipe_x86:mk_fp_unop(FpUnOp, Dst)];
+ _ ->
+ [hipe_x86:mk_fmove(Src, Dst),
+ hipe_x86:mk_fp_unop(FpUnOp, Dst)]
+ end.
+
+conv_fp_unop(RtlFpUnOp) ->
+ case RtlFpUnOp of
+ 'fchs' -> 'fchs'
+ end.
+
+%%% Finalise the conversion of a 3-address FP operation.
+
+conv_fp_binary(Dst, Src1, FpBinOp, Src2) ->
+ case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_fmove(Src1, Dst), % z = x
+ hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y
+ true -> % y = x op y, where y != x
+ case fp_binop_commutes(FpBinOp) of
+ true -> % y = y op x
+ [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x
+ false -> % y = x op y, where op doesn't commute
+ RevFpBinOp = reverse_fp_binop(FpBinOp),
+ [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)]
+ end
+ end
+ end.
+
+%%% Convert an RTL FP binary operator.
+
+conv_fp_binop(RtlFpBinOp) ->
+ case RtlFpBinOp of
+ 'fadd' -> 'fadd';
+ 'fdiv' -> 'fdiv';
+ 'fmul' -> 'fmul';
+ 'fsub' -> 'fsub'
+ end.
+
+fp_binop_commutes(FpBinOp) ->
+ case FpBinOp of
+ 'fadd' -> true;
+ 'fmul' -> true;
+ _ -> false
+ end.
+
+reverse_fp_binop(FpBinOp) ->
+ case FpBinOp of
+ 'fsub' -> 'fsubr';
+ 'fdiv' -> 'fdivr'
+ end.
+
+%%% Create a jmp_switch instruction.
+
+-ifdef(HIPE_AMD64).
+mk_jmp_switch(Index, JTabLab, Labels) ->
+ JTabReg = hipe_x86:mk_new_temp('untagged'),
+ JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant),
+ [hipe_x86:mk_move64(JTabImm, JTabReg),
+ hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)].
+-else.
+mk_jmp_switch(Index, JTabLab, Labels) ->
+ %% this is equivalent to "jmp *JTabLab(,Index,4)"
+ %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice)
+ [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)].
+-endif.
+
+%%% Finalise the translation of a load_address instruction.
+
+-ifdef(HIPE_AMD64).
+mk_load_address(Type, Src, Dst) ->
+ case Type of
+ c_const -> % 32 bits
+ [hipe_x86:mk_move(Src, Dst)];
+ _ ->
+ [hipe_x86:mk_move64(Src, Dst)]
+ end.
+-else.
+mk_load_address(_Type, Src, Dst) ->
+ [hipe_x86:mk_move(Src, Dst)].
+-endif.
+
+%%% Translate 32-bit and larger loads.
+
+-ifdef(HIPE_AMD64).
+mk_load(LoadSize, LoadSign, Src, Off, Dst) ->
+ case {LoadSize, LoadSign} of
+ {int32, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)];
+ {int32, unsigned} ->
+ %% The processor zero-extends for us. No need for 'movzx'.
+ [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)];
+ {_, _} ->
+ mk_load_word(Src, Off, Dst)
+ end.
+-else.
+mk_load(_LoadSize, _LoadSign, Src, Off, Dst) ->
+ mk_load_word(Src, Off, Dst).
+-endif.
+
+mk_load_word(Src, Off, Dst) ->
+ Type = typeof_dst(Dst),
+ [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)].
+
+%%% Finalise the translation of a store instruction.
+
+-ifdef(HIPE_AMD64).
+mk_store(RtlStoreSize, Src, Ptr, Off) ->
+ Type = case RtlStoreSize of
+ word ->
+ typeof_src(Src);
+ OtherType ->
+ OtherType
+ end,
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))].
+-else.
+mk_store(RtlStoreSize, Src, Ptr, Off) ->
+ case RtlStoreSize of
+ word ->
+ Type = typeof_src(Src),
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ int32 ->
+ Type = typeof_src(Src),
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ int16 ->
+ Type = 'int16',
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ byte ->
+ Type = 'byte',
+ {NewSrc, I1} = conv_small_store(Src),
+ I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))]
+ end.
+
+conv_small_store(Src) ->
+ case hipe_x86:is_imm(Src) of
+ true ->
+ {Src, []};
+ false ->
+ NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'),
+ {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]}
+ end.
+-endif.