diff options
Diffstat (limited to 'lib/hipe/x86/hipe_x86_assemble.erl')
-rw-r--r-- | lib/hipe/x86/hipe_x86_assemble.erl | 1014 |
1 files changed, 1014 insertions, 0 deletions
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl new file mode 100644 index 0000000000..4e65736db3 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -0,0 +1,1014 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% HiPE/x86 assembler +%%% +%%% TODO: +%%% - Simplify combine_label_maps and mk_data_relocs. +%%% - Move find_const to hipe_pack_constants? + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble). +-define(HIPE_X86_ENCODE, hipe_amd64_encode). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_PP, hipe_amd64_pp). +-ifdef(AMD64_SIMULATE_NSP). +-define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP). +-endif. +-define(EAX, rax). +-define(REGArch, reg64). +-define(RMArch, rm64). +-define(EA_DISP32_ABSOLUTE, ea_disp32_sindex). +-else. +-define(HIPE_X86_ASSEMBLE, hipe_x86_assemble). +-define(HIPE_X86_ENCODE, hipe_x86_encode). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(EAX, eax). +-define(REGArch, reg32). +-define(RMArch, rm32). +-define(EA_DISP32_ABSOLUTE, ea_disp32). +-endif. + +-module(?HIPE_X86_ASSEMBLE). +-export([assemble/4]). + +-define(DEBUG,true). + +-include("../main/hipe.hrl"). +-include("../x86/hipe_x86.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + ?when_option(time, Options, ?start_timer("x86 assembler")), + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_x86:defun_code(Defun), + hipe_x86:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap, Options), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% put(code_size, CodeSize), + %% put(const_size, ConstSize), + %% ?when_option(verbose, Options, + %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + %% ?when_option(time, Options, ?stop_timer("x86 assembler")), + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% Build LabelMap for each MFA. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap, Options) -> + translate_mfas(Code, ConstMap, [], Options). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options); +translate_mfas([], _ConstMap, NewCode, _Options) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) -> + NewIs = translate_insn(I, Context, Options), + add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options); +translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {jcc_sdi,{_,{label,L}},_} -> + SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + {jmp_sdi,{{label,L}},_} -> + SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options); +add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options). + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.sdesc',_,_} -> 0; + {jcc_sdi,_,_} -> 2; + {jmp_sdi,_,_} -> 2; + {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg) + end. + +translate_insn(I, Context, Options) -> + case I of + #alu{} -> + Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), + [{hipe_x86:alu_op(I), Arg, I}]; + #call{} -> + translate_call(I); + #cmovcc{} -> + {Dst,Src} = resolve_move_args( + hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I), + Context), + CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))}, + Arg = {CC,Dst,Src}, + [{cmovcc, Arg, I}]; + #cmp{} -> + Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context), + [{cmp, Arg, I}]; + #comment{} -> + []; + #fmove{} -> + {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I), + hipe_x86:fmove_dst(I)), + [{Op, Arg, I}]; + #fp_binop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{hipe_x86:fp_binop_op(I), Arg, I}]; + false -> % sse2 + Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}] + end; + #fp_unop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)), + [{hipe_x86:fp_unop_op(I), Arg, I}]; + false -> % sse2 + case hipe_x86:fp_unop_op(I) of + 'fchs' -> + Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)), + [{'xorpd', Arg, I}]; + 'fwait' -> % no op on sse2, magic on x87 + [] + end + end; + #imul{} -> + translate_imul(I, Context); + #jcc{} -> + Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))}, + Label = translate_label(hipe_x86:jcc_label(I)), + [{jcc_sdi, {Cc,Label}, I}]; + #jmp_fun{} -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:jmp_fun_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt), + [{jmp, {Arg}, I}]; + #jmp_label{} -> + Arg = translate_label(hipe_x86:jmp_label_label(I)), + [{jmp_sdi, {Arg}, I}]; + #jmp_switch{} -> + RM32 = resolve_jmp_switch_arg(I, Context), + [{jmp, {RM32}, I}]; + #label{} -> + [{'.label', hipe_x86:label_label(I), I}]; + #lea{} -> + Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)), + [{lea, Arg, I}]; + #move{} -> + Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I), + Context), + [{mov, Arg, I}]; + #move64{} -> + translate_move64(I, Context); + #movsx{} -> + Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), + [{movsx, Arg, I}]; + #movzx{} -> + Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), + [{movzx, Arg, I}]; + %% pseudo_call: eliminated before assembly + %% pseudo_jcc: eliminated before assembly + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #pop{} -> + Arg = translate_dst(hipe_x86:pop_dst(I)), + [{pop, {Arg}, I}]; + #push{} -> + Arg = translate_src(hipe_x86:push_src(I), Context), + [{push, {Arg}, I}]; + #ret{} -> + translate_ret(I); + #shift{} -> + Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context), + [{hipe_x86:shift_op(I), Arg, I}]; + #test{} -> + Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context), + [{test, Arg, I}] + end. + +-ifdef(X86_SIMULATE_NSP). +-ifdef(HIPE_AMD64). +translate_call(I) -> + WordSize = hipe_amd64_registers:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + RegRA = + begin + RegTemp0 = hipe_amd64_registers:temp0(), + RegTemp1 = hipe_amd64_registers:temp1(), + case Fun of + #x86_temp{reg=RegTemp0} -> RegTemp1; + #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1; + _ -> RegTemp0 + end + end, + TempRA = hipe_x86:mk_temp(RegRA, untagged), + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I3 = {jmp, {JmpArg}, #comment{term=call}}, + Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}), + MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + temp_to_regArch(TempRA)}, + I2 = {mov, MovArgs, #comment{term=call}}, + Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs), + I1 = {lea, {temp_to_regArch(TempRA), + {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3,I4]. +-else. +translate_call(I) -> + WordSize = ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I2 = {jmp, {JmpArg}, #comment{term=call}}, + Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}), + I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + {imm32,{?X86ABSPCREL,4+Size2}}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3]. +-endif. + +translate_ret(I) -> + NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + RegRA = 2#011, % ebx/rbx + TempRA = hipe_x86:mk_temp(RegRA, untagged), + [{mov, + {temp_to_regArch(TempRA), + mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged))}, + I}, + {add, + {temp_to_rmArch(TempSP), + case NPOP < 128 of + true -> {imm8,NPOP}; + false -> {imm32,NPOP} + end}, + #comment{term=ret}}, + {jmp, + {temp_to_rmArch(TempRA)}, + #comment{term=ret}}]. + +-else. % not X86_SIMULATE_NSP + +translate_call(I) -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt), + SDesc = hipe_x86:call_sdesc(I), + [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}]. + +translate_ret(I) -> + Arg = + case hipe_x86:ret_npop(I) of + 0 -> {}; + N -> {{imm16,N}} + end, + [{ret, Arg, I}]. + +-endif. % X86_SIMULATE_NSP + +translate_imul(I, Context) -> + Temp = temp_to_regArch(hipe_x86:imul_temp(I)), + Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)), + Args = + case hipe_x86:imul_imm_opt(I) of + [] -> {Temp,Src}; + Imm -> {Temp,Src,translate_imm(Imm, Context, true)} + end, + [{'imul', Args, I}]. + +temp_or_mem_to_rmArch(Src) -> + case Src of + #x86_temp{} -> temp_to_rmArch(Src); + #x86_mem{} -> mem_to_rmArch(Src) + end. + +translate_label(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +translate_fun(Arg, PatchTypeExt) -> + case Arg of + #x86_temp{} -> + temp_to_rmArch(Arg); + #x86_mem{} -> + mem_to_rmArch(Arg); + #x86_mfa{m=M,f=F,a=A} -> + {rel32,{PatchTypeExt,{M,F,A}}}; + #x86_prim{prim=Prim} -> + {rel32,{PatchTypeExt,Prim}} + end. + +translate_src(Src, Context) -> + case Src of + #x86_imm{} -> + translate_imm(Src, Context, true); + _ -> + translate_dst(Src) + end. + +%%% MayTrunc8 controls whether negative Imm8s should be truncated +%%% to 8 bits or not. Truncation should always be done, except when +%%% the caller will widen the Imm8 to an Imm32 or Imm64. +translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) -> + if is_atom(Imm) -> + {imm32,{?LOAD_ATOM,Imm}}; + is_integer(Imm) -> + case (Imm =< 127) and (Imm >= -128) of + true -> + Imm8 = + case MayTrunc8 of + true -> Imm band 16#FF; + false -> Imm + end, + {imm8,Imm8}; + false -> + {imm32,Imm} + end; + true -> + Val = + case Imm of + {Label,constant} -> + {MFA,ConstMap} = Context, + ConstNo = find_const({MFA,Label}, ConstMap), + {constant,ConstNo}; + {Label,closure} -> + {closure,Label}; + {Label,c_const} -> + {c_const,Label} + end, + {imm32,{?LOAD_ADDRESS,Val}} + end. + +translate_dst(Dst) -> + case Dst of + #x86_temp{} -> + temp_to_regArch(Dst); + #x86_mem{type='double'} -> + mem_to_rm64fp(Dst); + #x86_mem{} -> + mem_to_rmArch(Dst); + #x86_fpreg{} -> + fpreg_to_stack(Dst) + end. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead? + +align_entry(Address) -> Address + nr_pad_bytes(Address). + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, align_entry(Size+CodeSize)); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for:~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = align_entry(Address + CodeSize), + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode, Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.sdesc',SDesc,_} -> + #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + Reloc = {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)}, + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address), + print_insn(Address, Bytes, I, Options), + Segment = list_to_binary(Bytes), + Size = byte_size(Segment), + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options) + end; +encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case nr_pad_bytes(Address) of + 0 -> + {Address,Relocs,AccCode}; + NrPadBytes -> % triggers at most once per function body + Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}), + encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options) + end. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {jcc_sdi,{CC,{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 6), + {jcc,{CC,{rel32,LongOffset}},OrigI} + end; + {jmp_sdi,{{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jmp,{{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 5), + {jmp,{{rel32,LongOffset}},OrigI} + end; + _ -> I + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fpreg_to_stack(#x86_fpreg{reg=Reg}) -> + {fpst, Reg}. + +temp_to_regArch(#x86_temp{reg=Reg}) -> + {?REGArch, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_reg64(#x86_temp{reg=Reg}) -> + {reg64, Reg}. +-endif. + +temp_to_reg32(#x86_temp{reg=Reg}) -> + {reg32, Reg}. +temp_to_reg16(#x86_temp{reg=Reg}) -> + {reg16, Reg}. +temp_to_reg8(#x86_temp{reg=Reg}) -> + {reg8, Reg}. + +temp_to_xmm(#x86_temp{reg=Reg}) -> + {xmm, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_rm64(#x86_temp{reg=Reg}) -> + {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-endif. + +temp_to_rmArch(#x86_temp{reg=Reg}) -> + {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm64fp(#x86_temp{reg=Reg}) -> + {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}. + +mem_to_ea(Mem) -> + EA = mem_to_ea_common(Mem), + {ea, EA}. + +mem_to_rm32(Mem) -> + EA = mem_to_ea_common(Mem), + {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rmArch(Mem) -> + EA = mem_to_ea_common(Mem), + {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm64fp(Mem) -> + EA = mem_to_ea_common(Mem), + {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +%%%%%%%%%%%%%%%%% +mem_to_rm8(Mem) -> + EA = mem_to_ea_common(Mem), + {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm16(Mem) -> + EA = mem_to_ea_common(Mem), + {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}. +%%%%%%%%%%%%%%%%% + +mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) -> + ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off); +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) -> + case Base band 2#111 of + 5 -> % ebp/rbp or r13 + case Index band 2#111 of + 5 -> % ebp/rbp or r13 + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB); + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base), + SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) -> + if + Off =:= 0 -> + case Base of + 4 -> %esp, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 5 -> %ebp, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + 12 -> %r12, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 13 -> %r13, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + _ -> %neither SIB nor disp8 needed + ?HIPE_X86_ENCODE:ea_base(Base) + end; + Off >= -128, Off =< 127 -> + Disp8 = Off band 16#FF, + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + _ -> %use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base) + end; + true -> + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + _ -> + ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base) + end + end. + +%% jmp_switch +-ifdef(HIPE_AMD64). +resolve_jmp_switch_arg(I,�_Context) -> + Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)), + Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)), + SINDEX = hipe_amd64_encode:sindex(3, Index), + SIB = hipe_amd64_encode:sib(Base, SINDEX), + EA = + if (Base =:= 5) or (Base =:= 13) -> + hipe_amd64_encode:ea_disp8_sib(0, SIB); + true -> + hipe_amd64_encode:ea_sib(SIB) + end, + {rm64,hipe_amd64_encode:rm_mem(EA)}. +-else. +resolve_jmp_switch_arg(I, {MFA,ConstMap}) -> + ConstNo = find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap), + Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}}, + SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))), + EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly + {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}. +-endif. + +%% lea reg, mem +resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst),mem_to_ea(Src)}. + +resolve_sse2_op(Op) -> + case Op of + fadd -> addsd; + fdiv -> divsd; + fmul -> mulsd; + fsub -> subsd; + _ -> exit({?MODULE, unknown_sse2_operator, Op}) + end. + +%% OP xmm, mem +resolve_sse2_binop_args(Src=#x86_mem{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),mem_to_rm64fp(Src)}; +%% movsd mem, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_mem{type=double}) -> + {mem_to_rm64fp(Dst),temp_to_xmm(Src)}; +%% OP xmm, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),temp_to_rm64fp(Src)}. + +%%% fmove -> cvtsi2sd or movsd +resolve_sse2_fmove_args(Src, Dst) -> + case {Src,Dst} of + {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg + {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}}; + {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem + {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}}; + _ -> % movsd + {movsd, resolve_sse2_binop_args(Src, Dst)} + end. + +%%% xorpd xmm, mem +resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst), + {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE( + {?LOAD_ADDRESS, + {c_const, sse2_fnegate_mask}})}}}. + +%% mov mem, imm +resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm + {mem_to_rm8(Dst),{imm8,ByteImm}}; + int16 -> + {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}}; + int32 -> + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {mem_to_rm32(Dst),{imm32,Imm}}; + _ -> + RMArch = mem_to_rmArch(Dst), + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {RMArch,{imm32,Imm}} + end; + +%% mov reg,mem +resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) -> + case Type of + int32 -> % must be unsigned + {temp_to_reg32(Dst),mem_to_rm32(Src)}; + _ -> + {temp_to_regArch(Dst),mem_to_rmArch(Src)} + end; + +%% mov mem,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + {mem_to_rm8(Dst),temp_to_reg8(Src)}; + int16 -> + {mem_to_rm16(Dst),temp_to_reg16(Src)}; + int32 -> + {mem_to_rm32(Dst),temp_to_reg32(Src)}; + tagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)}; + untagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)} + end; + +%% mov reg,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) -> + {temp_to_regArch(Dst),temp_to_rmArch(Src)}; + +%% mov reg,imm +resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + imm_move_args(Dst, Imm). + +-ifdef(HIPE_AMD64). +imm_move_args(Dst, Imm) -> + if is_number(Imm), Imm >= 0 -> + {temp_to_reg32(Dst),{imm32,Imm}}; + true -> + {temp_to_rm64(Dst),{imm32,Imm}} + end. +-else. +imm_move_args(Dst, Imm) -> + {temp_to_reg32(Dst),{imm32,Imm}}. +-endif. + +-ifdef(HIPE_AMD64). +translate_move64(I, Context) -> + Arg = resolve_move64_args(hipe_x86:move64_src(I), + hipe_x86:move64_dst(I), + Context), + [{mov, Arg, I}]. + +%% mov reg,imm64 +resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + {temp_to_reg64(Dst),{imm64,Imm}}. +-else. +translate_move64(I, _Context) -> exit({?MODULE, I}). +-endif. + +%%% mov{s,z}x +resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst), + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end}. + +%%% alu/cmp (_not_ test) +resolve_alu_args(Src, Dst, Context) -> + case {Src,Dst} of + {#x86_imm{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), translate_imm(Src, Context, true)}; + {#x86_mem{}, #x86_temp{}} -> + {temp_to_regArch(Dst), mem_to_rmArch(Src)}; + {#x86_temp{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), temp_to_regArch(Src)}; + {#x86_temp{}, #x86_temp{}} -> + {temp_to_regArch(Dst), temp_to_rmArch(Src)}; + {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm + NewSrc = translate_imm(Src, Context, true), + NewDst = + case NewSrc of + {imm8,_} -> temp_to_rmArch(Dst); + {imm32,_} -> ?EAX + end, + {NewDst, NewSrc}; + {#x86_imm{}, #x86_temp{}} -> + {temp_to_rmArch(Dst), translate_imm(Src, Context, true)} + end. + +%%% test +resolve_test_args(Src, Dst, Context) -> + case Src of + #x86_imm{} -> % imm8 not allowed + {_ImmSize,ImmValue} = translate_imm(Src, Context, false), + NewDst = + case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, {imm32,ImmValue}}; + #x86_temp{} -> + NewDst = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, temp_to_regArch(Src)} + end. + +%%% shifts +resolve_shift_args(Src, Dst, Context) -> + RM32 = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + Count = + case Src of + #x86_imm{value=1} -> 1; + #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8 + #x86_temp{reg=1} -> cl % temp must be ecx + end, + {RM32, Count}. + +%% x87_binop mem +resolve_x87_unop_arg(Arg=#x86_mem{type=Type})-> + case Type of + 'double' -> {mem_to_rm64fp(Arg)}; + 'untagged' -> {mem_to_rmArch(Arg)}; + _ -> ?EXIT({fmovArgNotSupported,{Arg}}) + end; +resolve_x87_unop_arg(Arg=#x86_fpreg{}) -> + {fpreg_to_stack(Arg)}; +resolve_x87_unop_arg([]) -> + []. + +%% x87_binop mem, st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})-> + {mem_to_rm64fp(Dst),fpreg_to_stack(Src)}; +%% x87_binop st(0), st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})-> + {fpreg_to_stack(Dst),fpreg_to_stack(Src)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({MFA,L},LabelMap) -> + gb_trees:get({MFA,L}, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Bytes, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)), + ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)). + +print_code_list_2([H | Tail]) -> + print_byte(H), + io:format(","), + print_code_list_2(Tail); +print_code_list_2([]) -> + io:format(""). + +print_insn_2(Address, Bytes, {_,_,OrigI}) -> + io:format("~8.16b | ", [Address]), + print_code_list(Bytes, 0), + ?HIPE_X86_PP:pp_insn(OrigI). + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(24-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N,[_|R]) -> + find_const(N,R); +find_const(C,[]) -> + ?EXIT({constant_not_found,C}). |