From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/hipe/x86/Makefile | 134 +++ lib/hipe/x86/NOTES.OPTIM | 200 ++++ lib/hipe/x86/NOTES.RA | 32 + lib/hipe/x86/TODO | 31 + lib/hipe/x86/hipe_rtl_to_x86.erl | 865 ++++++++++++++++++ lib/hipe/x86/hipe_x86.erl | 496 ++++++++++ lib/hipe/x86/hipe_x86.hrl | 116 +++ lib/hipe/x86/hipe_x86_assemble.erl | 1014 +++++++++++++++++++++ lib/hipe/x86/hipe_x86_cfg.erl | 147 +++ lib/hipe/x86/hipe_x86_defuse.erl | 160 ++++ lib/hipe/x86/hipe_x86_encode.erl | 1302 +++++++++++++++++++++++++++ lib/hipe/x86/hipe_x86_encode.txt | 213 +++++ lib/hipe/x86/hipe_x86_frame.erl | 687 ++++++++++++++ lib/hipe/x86/hipe_x86_liveness.erl | 57 ++ lib/hipe/x86/hipe_x86_main.erl | 70 ++ lib/hipe/x86/hipe_x86_postpass.erl | 276 ++++++ lib/hipe/x86/hipe_x86_pp.erl | 350 +++++++ lib/hipe/x86/hipe_x86_ra.erl | 99 ++ lib/hipe/x86/hipe_x86_ra_finalise.erl | 335 +++++++ lib/hipe/x86/hipe_x86_ra_ls.erl | 85 ++ lib/hipe/x86/hipe_x86_ra_naive.erl | 409 +++++++++ lib/hipe/x86/hipe_x86_ra_postconditions.erl | 452 ++++++++++ lib/hipe/x86/hipe_x86_ra_x87_ls.erl | 63 ++ lib/hipe/x86/hipe_x86_registers.erl | 254 ++++++ lib/hipe/x86/hipe_x86_spill_restore.erl | 345 +++++++ lib/hipe/x86/hipe_x86_x87.erl | 635 +++++++++++++ 26 files changed, 8827 insertions(+) create mode 100644 lib/hipe/x86/Makefile create mode 100644 lib/hipe/x86/NOTES.OPTIM create mode 100644 lib/hipe/x86/NOTES.RA create mode 100644 lib/hipe/x86/TODO create mode 100644 lib/hipe/x86/hipe_rtl_to_x86.erl create mode 100644 lib/hipe/x86/hipe_x86.erl create mode 100644 lib/hipe/x86/hipe_x86.hrl create mode 100644 lib/hipe/x86/hipe_x86_assemble.erl create mode 100644 lib/hipe/x86/hipe_x86_cfg.erl create mode 100644 lib/hipe/x86/hipe_x86_defuse.erl create mode 100644 lib/hipe/x86/hipe_x86_encode.erl create mode 100644 lib/hipe/x86/hipe_x86_encode.txt create mode 100644 lib/hipe/x86/hipe_x86_frame.erl create mode 100644 lib/hipe/x86/hipe_x86_liveness.erl create mode 100644 lib/hipe/x86/hipe_x86_main.erl create mode 100644 lib/hipe/x86/hipe_x86_postpass.erl create mode 100644 lib/hipe/x86/hipe_x86_pp.erl create mode 100644 lib/hipe/x86/hipe_x86_ra.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_finalise.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_ls.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_naive.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_postconditions.erl create mode 100644 lib/hipe/x86/hipe_x86_ra_x87_ls.erl create mode 100644 lib/hipe/x86/hipe_x86_registers.erl create mode 100644 lib/hipe/x86/hipe_x86_spill_restore.erl create mode 100644 lib/hipe/x86/hipe_x86_x87.erl (limited to 'lib/hipe/x86') diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile new file mode 100644 index 0000000000..065b56fce3 --- /dev/null +++ b/lib/hipe/x86/Makefile @@ -0,0 +1,134 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_rtl_to_x86 \ + hipe_x86 \ + hipe_x86_assemble \ + hipe_x86_cfg \ + hipe_x86_defuse \ + hipe_x86_encode \ + hipe_x86_frame \ + hipe_x86_liveness \ + hipe_x86_main \ + hipe_x86_postpass \ + hipe_x86_pp \ + hipe_x86_ra \ + hipe_x86_ra_finalise \ + hipe_x86_ra_ls \ + hipe_x86_ra_naive \ + hipe_x86_ra_postconditions \ + hipe_x86_ra_x87_ls \ + hipe_x86_registers \ + hipe_x86_spill_restore \ + hipe_x86_x87 + +HRL_FILES=hipe_x86.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# APP_FILE= +# APP_SRC=$(APP_FILE).src +# APP_TARGET=$(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC=$(APPUP_FILE).src +# APPUP_TARGET=$(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_rtl_to_x86.beam: ../rtl/hipe_rtl.hrl +$(EBIN)/hipe_x86_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl +$(EBIN)/hipe_x86_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_x86_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_x86_liveness.beam: ../flow/liveness.inc +$(EBIN)/hipe_x86_main.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl +$(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl +$(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl + +$(TARGET_FILES): hipe_x86.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM new file mode 100644 index 0000000000..4c241cacb4 --- /dev/null +++ b/lib/hipe/x86/NOTES.OPTIM @@ -0,0 +1,200 @@ +$Id$ + +Partial x86 code optimisation guide +=================================== +Priority should be given to P6 and P4, then K7, +then P5, and last to K6. + +Rules that are blatantly obvious or irrelevant for HiPE are +generally not listed. These includes things like alignment +of basic data types, store-forwarding rules when alignment +or sizes don't match, and partial register stalls. + +Intel P4 +-------- +The P6 4-1-1 insn decode template no longer applies. + +Simple insns (add/sub/cmp/test/and/or/xor/neg/not/mov/sahf) +are twice as fast as in P6. + +Shifts are "movsx" (sign-extend) are slower than in P6. + +Always avoid "inc" and "dec", use "add" and "sub" instead, +due to condition codes dependencies overhead. + +"fxch" is slightly more expensive than in P6, where it was free. + +Use "setcc" or "cmov" to eliminate unpredictable branches. + +For hot code executing out of the trace cache, alignment of +branch targets is less of an issue compared to P6. + +Do use "fxch" to simulate a flat FP register file, but only +for that purpose, not for manual scheduling for parallelism. + +Using "lea" is highly recommended. + +Eliminate redundant loads. Use regs as much as possible. + +Left shifts up to 3 have longer latencies than the equivalent +sequence of adds. + +Do utilise the addressing modes, to save registers and trace +cache bandwidth. + +"xor reg,reg" or "sub reg,reg" preferred over moving zero to reg. + +"test reg,reg" preferred over "cmp" with zero or "and". + +Avoid explicit cmp/test;jcc if the preceeding insn (alu, but not +mov or lea) set the condition codes. + +Load-execute alu insns (mem src) are Ok. + +Add-reg-to-mem slightly better than add-mem-to-reg. + +Add-reg-to-mem is better than load;add;store. + +Intel P6 +-------- +4-1-1 instruction decoding template: can decode one semi-complex +(max 4 uops) and two simple (1 uop) insns per clock; follow a +complex insn by two simple ones, otherwise the decoders will stall. + +Load-execute (mem src) alu insns are 2 uops. +Read-modify-write (mem dst) alu insns are 4 uops. + +Insns longer than 7 bytes block parallel decoding. +Avoid insns longer than 7 bytes. + +Lea is useful. + +"movzx" is preferred for zero-extension; the xor;mov alternative +causes a partial register stall. + +Use "test" instead of "cmp" with zero. + +Pull address calculations into load and store insn addressing modes. + +Clear a reg with "xor", not by moving zero to it. + +Many alu insns set the condition codes. Replace "alu;cmp;jcc" +with "alu;jcc". This is not applicable for "mov" or "lea". + +For FP code, simulate a flat register file on the x87 stack by +using fxch to reorder it. + +AMD K7 +------ +Select DirectPath insns. Avoid VectorPath insns due to slower decode. + +Alu insns with mem src are very efficient. +Alu insns with mem dst are very efficient. + +Fetches from I-cache are 16-byte aligned. Align functions and frequently +used labels at or near the start of 16-byte aligned blocks. + +"movzx" preferred over "xor;mov" for zero-extension. + +"push mem" preferred over "load;push reg". + +"xor reg,reg" preferred over moving zero to the reg. + +"test" preferred over "cmp". + +"pop" insns are VectorPath. "pop mem" has latency 3, "pop reg" has +latency 4. + +"push reg" and "push imm" are DirectPath, "push mem" is VectorPath. +The latency is 3 clocks. + +Intel P5 +-------- +If a loop header is less than 8 bytes away from a 16-byte +boundary, align it to the 16-byte boundary. + +If a return address is less than 8 bytes away from a 16-byte +boundary, align it to the 16-byte boundary. + +Align function entry points to 16-byte boundaries. + +Ensure that doubles are 64-bit aligned. + +Data cache line size is 32 bytes. The whole line is brought +in on a read miss. + +"push mem" is not pairable; loading a temp reg and pushing +the reg pairs better -- this is also faster on the 486. + +No conditional move instruction. + +Insns longer than 7 bytes can't go down the V-pipe or share +the insn FIFO with other insns. +Avoid insns longer than 7 bytes. + +Lea is useful when it replaces several other add/shift insns. +Lea is not a good replacement for a single shl since a scaled +index requires a disp32 (or base), making the insn longer. + +"movzx" is worse than the xor;mov alternative -- the opcode +prefix causes a slowdown and it is not pariable. + +Use "test" instead of "cmp" with zero. + +"test eax,imm" and "test reg,reg" are pairable, other forms are not. + +Pull address calculations into load and store insn addressing modes. + +Clear a reg with "xor", not by moving zero to it. + +Many alu insns set the condition codes. Replace "alu;cmp;jcc" +with "alu;jcc". This is not applicable for "mov" or "lea". + +For FP code, simulate a flat register file on the x87 stack by +using fxch to reorder it. + +"neg" and "not" are not pairable. "test imm,reg" and "test imm,mem" +are not pairable. Shifts by "cl" are not pairable. Shifts by "1" or +"imm" are pairable but only execute in the U-pipe. + +AMD K6 +------ +The insn size predecoder has a 3-byte window. Insns with both prefix +and SIB bytes cannot be short-decoded. + +Use short and simple insns, including mem src alu insns. + +Avoid insns longer than 7 bytes. They cannot be short-decoded. +Short-decode: max 7 bytes, max 2 uops. +Long-decode: max 11 bytes, max 4 uops. +Vector-decode: longer than 11 bytes or more than 4 uops. + +Prefer read-modify-write alu insns (mem dst) over "load;op;store" +sequences, for code density and register pressure reasons. + +Avoid the "(esi)" addressing mode: it forces the insn to be vector-decoded. +Use a different reg or add an explicit zero displacement. + +"add reg,reg" preferred over a shl by 1, it parallelises better. + +"movzx" preferred over "xor;mov" for zero-extension. + +Moving zero to a reg preferred over "xor reg,reg" due to dependencies +and condition codes overhead. + +"push mem" preferred over "load;push reg" due to code density and +register pressure. (Page 64.) +Explicit moves preferred when pushing args for fn calls, due to +%esp dependencies and random access possibility. (Page 58.) +[hmm, these two are in conflict] + +There is no penalty for seg reg prefix unless there are multiple prefixes. + +Align function entries and frequent branch targets to 16-byte boundaries. + +Shifts by imm only go down one of the pipes. + +"test reg,reg" preferred over "cmp" with zero. +"test reg,imm" is a long-decode insn. + +No conditional move insn. diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA new file mode 100644 index 0000000000..ce80411642 --- /dev/null +++ b/lib/hipe/x86/NOTES.RA @@ -0,0 +1,32 @@ +$Id$ + +Register Allocation +=================== + +These are the rules that HiPE x86 register allocators must abide by. + +- Before RA, every Temp (precoloured or pseudo) is semantically + equivalent to Reg. Any operand may be Temp. + +- Before RA, only FIXED registers may occur in precoloured Temps. + Exception 1 is move: src or dst may be an argument register. + Exception 2 is call: the dst (if any) must be %eax. + +- After RA, an operand (src or dst) may refer to at most one memory cell. + Therefore, a pseudo-Temp MAY NOT occur as base or offset in an + explicit memory operand after RA. + +- After RA, a binary operation (alu, cmp, move) may refer to at most + one memory cell. Therefore, AT MOST ONE of src and dst may be a + pseudo-Temp after RA. If one of the operands (src or dst) is an + explicit memory operand, then the other operand MUST NOT be a + pseudo-Temp after RA. + +- After RA, the index in a jmp_switch must be a register. + +- After RA, the temp in a lea must be a register. + +- After RA, the temp in an imul must be a register. + +- After RA, a function's formal parameters must reside on the stack. + Therefore, the RA MUST NOT map the formals to actual registers. diff --git a/lib/hipe/x86/TODO b/lib/hipe/x86/TODO new file mode 100644 index 0000000000..7c93f7daf3 --- /dev/null +++ b/lib/hipe/x86/TODO @@ -0,0 +1,31 @@ +rtl_to_x86: +* recognise alub(X,X,sub,1,lt,L1,L2,P) and turn it into 'dec', + this might improve the reduction test code slightly (X is + the pseudo for FCALLS) +* recognise alu(Z,X,add,Y) and turn it into 'lea'. +* rewrite tailcalls as parallel assignments before regalloc + +x86: +* Use separate constructors for real regs (x86_reg) and pseudos (x86_temp). + +Frame: +* drop tailcall rewrite + +Registers: +* make the 2 regs now reserved for frame's tailcall rewrite available for arg passing + +Optimizations: +* replace jcc cc,L1; jmp L0; L1: with jcc L0; L1: (length:len/2) +* Kill move X,X insns, either in frame or finalise +* Instruction scheduling module +* We can now choose to not have HP in %esi. However, this currently loses + performance due to (a) repeated moves to/from P_HP(P), and (b) spills of + the temp that contains a copy of P_HP(P). Both of these problems should be + fixed, and then, if we don't have any noticeable performance degradation, we + should permanently change to a non-reserved HP strategy. + +Loader: + +Assembler: + +Encode: diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl new file mode 100644 index 0000000000..d77e4fed3b --- /dev/null +++ b/lib/hipe/x86/hipe_rtl_to_x86.erl @@ -0,0 +1,865 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% Translate 3-address RTL code to 2-address pseudo-x86 code. + +-ifdef(HIPE_AMD64). +-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(ECX, rcx). +-define(EAX, rax). +-else. +-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(ECX, ecx). +-define(EAX, eax). +-endif. + +-module(?HIPE_RTL_TO_X86). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> % RTL function -> x86 defun + hipe_gensym:init(x86), + hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()), + hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals,_} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> + %% dst = src1 binop src2 + BinOp = conv_binop(hipe_rtl:alu_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + I2 = + case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of + true -> + conv_shift(Dst, Src1, BinOp, Src2); + false -> + conv_alu(Dst, Src1, BinOp, Src2, []) + end, + {FixSrc1++FixSrc2++I2, Map2, Data}; + #alub{} -> + %% dst = src1 op src2; if COND goto label + BinOp = conv_binop(hipe_rtl:alub_op(I)), + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + Cc = conv_cond(hipe_rtl:alub_cond(I)), + I1 = [hipe_x86:mk_pseudo_jcc(Cc, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I))], + I2 = conv_alu(Dst, Src1, BinOp, Src2, I1), + {FixSrc1++FixSrc2++I2, Map2, Data}; + #branch{} -> + %% = src1 - src2; if COND goto label + {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + Cc = conv_cond(hipe_rtl:branch_cond(I)), + I2 = conv_branch(Src1, Cc, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {FixSrc1++FixSrc2++I2, Map1, Data}; + #call{} -> + %% push + %% ... + %% push + %% eax := call ; if exn goto else goto Next + %% Next: + %% := eax + %% goto + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + I2 = conv_call(Dsts, Fun, Args, + hipe_rtl:call_continuation(I), + hipe_rtl:call_fail(I), + hipe_rtl:call_type(I)), + %% XXX Fixme: this ++ is probably inefficient. + {FixArgs++I2, Map2, Data}; + #comment{} -> + I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}; + #enter{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)), + {FixArgs++I2, Map1, Data}; + #goto{} -> + I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}; + #label{} -> + I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}; + #load{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of + {byte, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {byte, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)]; + {int16, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {int16, unsigned} -> + [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)]; + {LoadSize, LoadSign} -> + mk_load(LoadSize, LoadSign, Src, Off, Dst) + end, + {FixSrc++FixOff++I2, Map2, Data}; + #load_address{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = hipe_x86:mk_imm_from_addr(Addr, Type), + I2 = mk_load_address(Type, Src, Dst), + {I2, Map0, Data}; + #load_atom{} -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)), + I2 = [hipe_x86:mk_move(Src, Dst)], + {I2, Map0, Data}; + #move{} -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = [hipe_x86:mk_move(Src, Dst)], + {FixSrc++I2, Map1, Data}; + #return{} -> + {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + %% frame will fill in npop later, hence the "mk_ret(-1)" + I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]), + {FixArgs++I2, Map0, Data}; + #store{} -> + {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off), + {FixSrc++FixOff++I2, Map2, Data}; + #switch{} -> % this one also updates Data :-( + %% from hipe_rtl2sparc, but we use a hairy addressing mode + %% instead of doing the arithmetic manually + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + I2 = mk_jmp_switch(Index, JTabLab, Labels), + {I2, Map1, NewData}; + #fload{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)], + {I2, Map2, Data}; + #fstore{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0), + {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))], + {I2, Map2, Data}; + #fp{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map), + {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0), + {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1), + FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)), + I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2), + {I2, Map2, Data}; + #fp_unop{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0), + FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)), + I2 = conv_fp_unary(Dst, Src, FpUnOp), + {I2, Map1, Data}; + #fmove{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + #fconv{} -> + {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map), + {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), + I2 = [hipe_x86:mk_fmove(Src, Dst)], + {I2, Map1, Data}; + X -> + %% gctest?? + %% jmp, jmp_link, jsr, esr, multimove, + %% stackneed, pop_frame, restore_frame, save_frame + throw({?MODULE, {"unknown RTL instruction", X}}) + end. + +%%% Finalise the conversion of a 3-address ALU operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_alu(Dst, Src1, 'imul', Src2, Tail) -> + mk_imul(Src1, Src2, Dst, Tail); +conv_alu(Dst, Src1, BinOp, Src2, Tail) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y + true -> % y = x op y, where y != x + case binop_commutes(BinOp) of + true -> % y = y op x + [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x + false -> % y = x op y, where op doesn't commute + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t + end + end + end. + +mk_imul(Src1, Src2, Dst, Tail) -> + case hipe_x86:is_imm(Src1) of + true -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_iit(Src1, Src2, Dst, Tail); + _ -> + mk_imul_itt(Src1, Src2, Dst, Tail) + end; + _ -> + case hipe_x86:is_imm(Src2) of + true -> + mk_imul_itt(Src2, Src1, Dst, Tail); + _ -> + mk_imul_ttt(Src1, Src2, Dst, Tail) + end + end. + +mk_imul_iit(Src1, Src2, Dst, Tail) -> + io:format("~w: RTL mul with two immediates\n", [?MODULE]), + Tmp2 = new_untagged_temp(), + [hipe_x86:mk_move(Src2, Tmp2) | + mk_imul_itt(Src1, Tmp2, Dst, Tail)]. + +mk_imul_itt(Src1, Src2, Dst, Tail) -> + [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail]. + +mk_imul_ttt(Src1, Src2, Dst, Tail) -> + case same_opnd(Dst, Src1) of + true -> + [hipe_x86:mk_imul([], Src2, Dst) | Tail]; + false -> + case same_opnd(Dst, Src2) of + true -> + [hipe_x86:mk_imul([], Src1, Dst) | Tail]; + false -> + [hipe_x86:mk_move(Src1, Dst), + hipe_x86:mk_imul([], Src2, Dst) | Tail] + end + end. + +conv_shift(Dst, Src1, BinOp, Src2) -> + {NewSrc2,I1} = + case hipe_x86:is_imm(Src2) of + true -> + {Src2, []}; + false -> + NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]} + end, + I2 = case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_move(Src1, Dst), % z = x + hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y + true -> % y = x op y, no shift op commutes + Tmp = clone_dst(Dst), + [hipe_x86:mk_move(Src1, Tmp), % t = x + hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y + hipe_x86:mk_move(Tmp, Dst)] % y = t + end + end, + I1 ++ I2. + +%%% Finalise the conversion of a conditional branch operation, taking +%%% care to not introduce more temps and moves than necessary. + +conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + case hipe_x86:is_imm(Src1) of + false -> + mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred); + true -> + case hipe_x86:is_imm(Src2) of + false -> + NewCc = commute_cc(Cc), + mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred); + true -> + %% two immediates, let the optimiser clean it up + Tmp = new_untagged_temp(), + [hipe_x86:mk_move(Src1, Tmp) | + mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)] + end + end. + +mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) -> + %% PRE: not(is_imm(Src1)) + [hipe_x86:mk_cmp(Src2, Src1), + hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)]. + +%%% Convert an RTL ALU or ALUB binary operator. + +conv_binop(BinOp) -> + case BinOp of + 'add' -> 'add'; + 'sub' -> 'sub'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'shl'; + 'srl' -> 'shr'; + 'sra' -> 'sar'; + 'mul' -> 'imul'; + %% andnot ??? + _ -> exit({?MODULE, {"unknown binop", BinOp}}) + end. + +binop_commutes(BinOp) -> + case BinOp of + 'add' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + _ -> false + end. + +%%% Convert an RTL conditional operator. + +conv_cond(Cond) -> + case Cond of + eq -> 'e'; + ne -> 'ne'; + gt -> 'g'; + gtu -> 'a'; + ge -> 'ge'; + geu -> 'ae'; + lt -> 'l'; + ltu -> 'b'; + le -> 'le'; + leu -> 'be'; + overflow -> 'o'; + not_overflow -> 'no'; + _ -> exit({?MODULE, {"unknown rtl cond", Cond}}) + end. + +commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x + case Cc of + 'e' -> 'e'; % ==, == + 'ne' -> 'ne'; % !=, != + 'g' -> 'l'; % >, < + 'a' -> 'b'; % >u, 'le'; % >=, <= + 'ae' -> 'be'; % >=u, <=u + 'l' -> 'g'; % <, > + 'b' -> 'a'; % u + 'le' -> 'ge'; % <=, >= + 'be' -> 'ae'; % <=u, >=u + %% overflow/not_overflow: n/a + _ -> exit({?MODULE, {"unknown cc", Cc}}) + end. + +%%% Test if Dst and Src are the same operand. + +same_opnd(Dst, Src) -> Dst =:= Src. + +%%% Finalise the conversion of a tailcall instruction. + +conv_tailcall(Fun, Args, Linkage) -> + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + move_actuals(RegArgs, + [hipe_x86:mk_pseudo_tailcall_prepare(), + hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]). + +split_args(Args) -> + split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []). +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = ?HIPE_X86_REGISTERS:arg(I), + Temp = hipe_x86:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +move_actuals([], Rest) -> Rest; +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +move_formals([], Rest) -> Rest; +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]). + +%%% Finalise the conversion of a call instruction. + +conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:is_prim(Fun) of + true -> + conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_x86:prim_prim(Prim) of + 'fwait' -> + conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage); + _ -> + conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +conv_fwait_call([], [], [], [], not_remote) -> + [hipe_x86:mk_fp_unop('fwait', [])]. + +conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case do_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(x86), + {NewContLab, [hipe_x86:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + %% + %% This should be fixed to propagate "fallthrough calls" + %% When the rest of the backend supports them. + NewContLab = hipe_gensym:get_next_label(x86), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation + %% jump to it. + {NewContLab, + [hipe_x86:mk_label(NewContLab) | + Moves ++ + [hipe_x86:mk_jmp_label(ContLab)]]} + end + end, + SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}), + CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +do_push_args([Arg|Args], Tail) -> + [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)]; +do_push_args([], Tail) -> + Tail. + +%%% Move return values from the return value registers. + +do_call_results(DstList) -> + do_call_results(DstList, 0, []). + +do_call_results([Dst|DstList], I, Rest) -> + Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + do_call_results(DstList, I+1, [Move|Rest]); +do_call_results([], _, Insns) -> Insns. + +%%% Move return values to the return value registers. + +move_retvals(SrcLst, Rest) -> + move_retvals(SrcLst, 0, Rest). + +move_retvals([Src|SrcLst], I, Rest) -> + Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'), + Move = hipe_x86:mk_move(Src, Dst), + move_retvals(SrcLst, I+1, [Move|Rest]); +move_retvals([], _, Insns) -> Insns. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case Fun of + Prim when is_atom(Prim) -> + {hipe_x86:mk_prim(Prim), Map}; + {M,F,A} when is_atom(M), is_atom(F), is_integer(A) -> + {hipe_x86:mk_mfa(M,F,A), Map}; + _ -> + exit({?MODULE,conv_fun,Fun}) + end + end + end. + +%%% Convert an RTL source operand (imm/var/reg). + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + conv_imm(Opnd, Map); + false -> + {NewOpnd,NewMap} = conv_dst(Opnd, Map), + {[], NewOpnd, NewMap} + end. + +-ifdef(HIPE_AMD64). +conv_imm(Opnd, Map) -> + ImmVal = hipe_rtl:imm_value(Opnd), + case is_imm64(ImmVal) of + true -> + Temp = hipe_x86:mk_new_temp('untagged'), + {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map}; + false -> + {[], hipe_x86:mk_imm(ImmVal), Map} + end. + +is_imm64(Value) when is_integer(Value) -> + (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1); +is_imm64({_,atom}) -> false; % Atoms are 32 bits. +is_imm64({_,c_const}) -> false; % c_consts are 32 bits. +is_imm64({_,_}) -> true . % Other relocs are 64 bits. +-else. +conv_imm(Opnd, Map) -> + {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}. +-endif. + +conv_src_list([O|Os], Map) -> + {NewInstr, V, Map1} = conv_src(O, Map), + {Instrs, Vs, Map2} = conv_src_list(Os, Map1), + {Instrs++NewInstr, [V|Vs], Map2}; +conv_src_list([], Map) -> + {[], [], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + case ?HIPE_X86_REGISTERS:is_precoloured(Name) of + true -> + case ?HIPE_X86_REGISTERS:proc_offset(Name) of + false -> + {hipe_x86:mk_temp(Name, Type), Map}; + Offset -> + Preg = ?HIPE_X86_REGISTERS:proc_pointer(), + Pbase = hipe_x86:mk_temp(Preg, 'untagged'), + Poff = hipe_x86:mk_imm(Offset), + {hipe_x86:mk_mem(Pbase, Poff, Type), Map} + end; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_x86:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + false ->'untagged' + end, + Dst = + if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable + true -> hipe_x86:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% typeof_src -- what's src's type? + +typeof_src(Src) -> + case hipe_x86:is_imm(Src) of + true -> + 'untagged'; + _ -> + typeof_dst(Src) + end. + +%%% typeof_dst -- what's dst's type? + +typeof_dst(Dst) -> + case hipe_x86:is_temp(Dst) of + true -> + hipe_x86:temp_type(Dst); + _ -> + hipe_x86:mem_type(Dst) + end. + +%%% clone_dst -- conjure up a scratch reg with same type as dst + +clone_dst(Dst) -> + hipe_x86:mk_new_temp(typeof_dst(Dst)). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_x86:mk_new_temp('untagged'). + +%%% Map from RTL var/reg operands to x86 temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +%%% Finalise the conversion of a 2-address FP operation. + +conv_fp_unary(Dst, Src, FpUnOp) -> + case same_opnd(Dst, Src) of + true -> + [hipe_x86:mk_fp_unop(FpUnOp, Dst)]; + _ -> + [hipe_x86:mk_fmove(Src, Dst), + hipe_x86:mk_fp_unop(FpUnOp, Dst)] + end. + +conv_fp_unop(RtlFpUnOp) -> + case RtlFpUnOp of + 'fchs' -> 'fchs' + end. + +%%% Finalise the conversion of a 3-address FP operation. + +conv_fp_binary(Dst, Src1, FpBinOp, Src2) -> + case same_opnd(Dst, Src1) of + true -> % x = x op y + [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y + false -> % z = x op y, where z != x + case same_opnd(Dst, Src2) of + false -> % z = x op y, where z != x && z != y + [hipe_x86:mk_fmove(Src1, Dst), % z = x + hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y + true -> % y = x op y, where y != x + case fp_binop_commutes(FpBinOp) of + true -> % y = y op x + [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x + false -> % y = x op y, where op doesn't commute + RevFpBinOp = reverse_fp_binop(FpBinOp), + [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)] + end + end + end. + +%%% Convert an RTL FP binary operator. + +conv_fp_binop(RtlFpBinOp) -> + case RtlFpBinOp of + 'fadd' -> 'fadd'; + 'fdiv' -> 'fdiv'; + 'fmul' -> 'fmul'; + 'fsub' -> 'fsub' + end. + +fp_binop_commutes(FpBinOp) -> + case FpBinOp of + 'fadd' -> true; + 'fmul' -> true; + _ -> false + end. + +reverse_fp_binop(FpBinOp) -> + case FpBinOp of + 'fsub' -> 'fsubr'; + 'fdiv' -> 'fdivr' + end. + +%%% Create a jmp_switch instruction. + +-ifdef(HIPE_AMD64). +mk_jmp_switch(Index, JTabLab, Labels) -> + JTabReg = hipe_x86:mk_new_temp('untagged'), + JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant), + [hipe_x86:mk_move64(JTabImm, JTabReg), + hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)]. +-else. +mk_jmp_switch(Index, JTabLab, Labels) -> + %% this is equivalent to "jmp *JTabLab(,Index,4)" + %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice) + [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)]. +-endif. + +%%% Finalise the translation of a load_address instruction. + +-ifdef(HIPE_AMD64). +mk_load_address(Type, Src, Dst) -> + case Type of + c_const -> % 32 bits + [hipe_x86:mk_move(Src, Dst)]; + _ -> + [hipe_x86:mk_move64(Src, Dst)] + end. +-else. +mk_load_address(_Type, Src, Dst) -> + [hipe_x86:mk_move(Src, Dst)]. +-endif. + +%%% Translate 32-bit and larger loads. + +-ifdef(HIPE_AMD64). +mk_load(LoadSize, LoadSign, Src, Off, Dst) -> + case {LoadSize, LoadSign} of + {int32, signed} -> + [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {int32, unsigned} -> + %% The processor zero-extends for us. No need for 'movzx'. + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)]; + {_, _} -> + mk_load_word(Src, Off, Dst) + end. +-else. +mk_load(_LoadSize, _LoadSign, Src, Off, Dst) -> + mk_load_word(Src, Off, Dst). +-endif. + +mk_load_word(Src, Off, Dst) -> + Type = typeof_dst(Dst), + [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)]. + +%%% Finalise the translation of a store instruction. + +-ifdef(HIPE_AMD64). +mk_store(RtlStoreSize, Src, Ptr, Off) -> + Type = case RtlStoreSize of + word -> + typeof_src(Src); + OtherType -> + OtherType + end, + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]. +-else. +mk_store(RtlStoreSize, Src, Ptr, Off) -> + case RtlStoreSize of + word -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int32 -> + Type = typeof_src(Src), + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + int16 -> + Type = 'int16', + [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))]; + byte -> + Type = 'byte', + {NewSrc, I1} = conv_small_store(Src), + I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))] + end. + +conv_small_store(Src) -> + case hipe_x86:is_imm(Src) of + true -> + {Src, []}; + false -> + NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'), + {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]} + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl new file mode 100644 index 0000000000..3298151366 --- /dev/null +++ b/lib/hipe/x86/hipe_x86.erl @@ -0,0 +1,496 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% representation of 2-address pseudo-amd64 code + +-module(hipe_x86). + +-include("hipe_x86.hrl"). + +%% Commented out are interface functions which are currently not used. +-export([mk_temp/2, + %% mk_nonallocatable_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + + mk_imm/1, + mk_imm_from_addr/2, + mk_imm_from_atom/1, + is_imm/1, + %% imm_value/1, + + mk_mem/3, + %% is_mem/1, + %% mem_base/1, + %% mem_off/1, + mem_type/1, + + mk_fpreg/1, + mk_fpreg/2, + %% is_fpreg/1, + %% fpreg_is_pseudo/1, + %% fpreg_reg/1, + + mk_mfa/3, + %% is_mfa/1, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + %% insn_type/1, + + mk_alu/3, + %% is_alu/1, + alu_op/1, + alu_src/1, + alu_dst/1, + + mk_call/3, + %% is_call/1, + call_fun/1, + call_sdesc/1, + call_linkage/1, + + %% mk_cmovcc/3, + %% is_cmovcc/1, + cmovcc_cc/1, + cmovcc_src/1, + cmovcc_dst/1, + + mk_cmp/2, + %% is_cmp/1, + cmp_src/1, + cmp_dst/1, + + mk_comment/1, + %% is_comment/1, + %% comment_term/1, + + mk_fmove/2, + is_fmove/1, + fmove_src/1, + fmove_dst/1, + + mk_fp_unop/2, + %% is_fp_unop/1, + fp_unop_arg/1, + fp_unop_op/1, + + mk_fp_binop/3, + %% is_fp_binop/1, + fp_binop_src/1, + fp_binop_dst/1, + fp_binop_op/1, + + mk_imul/3, + imul_imm_opt/1, + imul_src/1, + imul_temp/1, + + mk_jcc/2, + %% is_jcc/1, + jcc_cc/1, + jcc_label/1, + + mk_jmp_fun/2, + %% is_jmp_fun/1, + jmp_fun_fun/1, + jmp_fun_linkage/1, + + mk_jmp_label/1, + %% is_jmp_label/1, + jmp_label_label/1, + + mk_jmp_switch/3, + %% is_jmp_switch/1, + jmp_switch_temp/1, + jmp_switch_jtab/1, + %% jmp_switch_labels/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_lea/2, + %% is_lea/1, + lea_mem/1, + lea_temp/1, + + mk_move/2, + is_move/1, + move_src/1, + move_dst/1, + mk_move64/2, + %% is_move64/1, + move64_src/1, + move64_dst/1, + + mk_movsx/2, + %% is_movsx/1, + movsx_src/1, + movsx_dst/1, + + mk_movzx/2, + %% is_movzx/1, + movzx_src/1, + movzx_dst/1, + + mk_pseudo_call/4, + %% is_pseudo_call/1, + pseudo_call_fun/1, + pseudo_call_sdesc/1, + pseudo_call_contlab/1, + pseudo_call_linkage/1, + + mk_pseudo_jcc/4, + %% is_pseudo_jcc/1, + %% pseudo_jcc_cc/1, + %% pseudo_jcc_true_label/1, + %% pseudo_jcc_false_label/1, + %% pseudo_jcc_pred/1, + + mk_pseudo_spill/1, + + mk_pseudo_tailcall/4, + %% is_pseudo_tailcall/1, + pseudo_tailcall_fun/1, + %% pseudo_tailcall_arity/1, + pseudo_tailcall_stkargs/1, + pseudo_tailcall_linkage/1, + + mk_pseudo_tailcall_prepare/0, + %% is_pseudo_tailcall_prepare/1, + + mk_push/1, + %% is_push/1, + push_src/1, + + %% mk_pop/1, + pop_dst/1, + + mk_ret/1, + %% is_ret/1, + ret_npop/1, + + mk_shift/3, + %% is_shift/1, + shift_op/1, + shift_src/1, + shift_dst/1, + + %% mk_test/2, + test_src/1, + test_dst/1, + + mk_defun/8, + defun_mfa/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_code/1, + defun_data/1, + defun_var_range/1 + %% defun_label_range/1, + + %% highest_temp/1 + ]). + +%%% +%%% Low-level accessors. +%%% + +mk_temp(Reg, Type) when is_integer(Reg) -> + #x86_temp{reg=Reg, type=Type, allocatable=true}. +mk_nonallocatable_temp(Reg, Type) when is_integer(Reg) -> + #x86_temp{reg=Reg, type=Type, allocatable=false}. +mk_new_temp(Type) -> + mk_temp(hipe_gensym:get_next_var(x86), Type). +mk_new_nonallocatable_temp(Type) -> + mk_nonallocatable_temp(hipe_gensym:get_next_var(x86), Type). +is_temp(X) -> case X of #x86_temp{} -> true; _ -> false end. +temp_reg(#x86_temp{reg=Reg}) when is_integer(Reg) -> Reg. +temp_type(#x86_temp{type=Type}) -> Type. +temp_is_allocatable(#x86_temp{allocatable=A}) -> A. + +mk_imm(Value) -> #x86_imm{value=Value}. +mk_imm_from_addr(Addr, Type) -> + mk_imm({Addr, Type}). +mk_imm_from_atom(Atom) -> + mk_imm(Atom). +is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end. +%% imm_value(#x86_imm{value=Value}) -> Value. + +mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}. +%% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end. +%% mem_base(#x86_mem{base=Base}) -> Base. +%% mem_off(#x86_mem{off=Off}) -> Off. +mem_type(#x86_mem{type=Type}) -> Type. + +mk_fpreg(Reg) -> #x86_fpreg{reg=Reg, pseudo=true}. +mk_fpreg(Reg, Pseudo) -> #x86_fpreg{reg=Reg, pseudo=Pseudo}. +%% is_fpreg(F) -> case F of #x86_fpreg{} -> true;_ -> false end. +%% fpreg_is_pseudo(#x86_fpreg{pseudo=Pseudo}) -> Pseudo. +%% fpreg_reg(#x86_fpreg{reg=Reg}) -> Reg. + +mk_mfa(M, F, A) -> #x86_mfa{m=M, f=F, a=A}. +%% is_mfa(X) -> case X of #x86_mfa{} -> true; _ -> false end. + +mk_prim(Prim) -> #x86_prim{prim=Prim}. +is_prim(X) -> case X of #x86_prim{} -> true; _ -> false end. +prim_prim(#x86_prim{prim=Prim}) -> Prim. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #x86_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +insn_type(Insn) -> + element(1, Insn). + +is_insn_type(Insn, Type) -> + case insn_type(Insn) of + Type -> true; + _ -> false + end. + +mk_alu(Op, Src, Dst) -> #alu{aluop=Op, src=Src, dst=Dst}. +%% is_alu(Insn) -> is_insn_type(Insn, alu). +alu_op(#alu{aluop=Op}) -> Op. +alu_src(#alu{src=Src}) -> Src. +alu_dst(#alu{dst=Dst}) -> Dst. + +mk_call(Fun, SDesc, Linkage) -> + check_linkage(Linkage), + #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. +%% is_call(Insn) -> is_insn_type(Insn, call). +call_fun(#call{'fun'=Fun}) -> Fun. +call_sdesc(#call{sdesc=SDesc}) -> SDesc. +call_linkage(#call{linkage=Linkage}) -> Linkage. + +check_linkage(Linkage) -> + case Linkage of + remote -> []; + not_remote -> [] + end. + +%% mk_cmovcc(Cc, Src, Dst) -> #cmovcc{cc=Cc, src=Src, dst=Dst}. +%% is_cmovcc(Insn) -> is_insn_type(Insn, cmovcc). +cmovcc_cc(#cmovcc{cc=Cc}) -> Cc. +cmovcc_src(#cmovcc{src=Src}) -> Src. +cmovcc_dst(#cmovcc{dst=Dst}) -> Dst. + +mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}. +%% is_cmp(Insn) -> is_insn_type(Insn, cmp). +cmp_src(#cmp{src=Src}) -> Src. +cmp_dst(#cmp{dst=Dst}) -> Dst. + +%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}. +test_src(#test{src=Src}) -> Src. +test_dst(#test{dst=Dst}) -> Dst. + +mk_comment(Term) -> #comment{term=Term}. +%% is_comment(Insn) -> is_insn_type(Insn, comment). +%% comment_term(#comment{term=Term}) -> Term. + +mk_fmove(Src, Dst) -> #fmove{src=Src, dst=Dst}. +is_fmove(F) -> is_insn_type(F, fmove). +fmove_src(#fmove{src=Src}) -> Src. +fmove_dst(#fmove{dst=Dst}) -> Dst. + +mk_fp_unop(Op, Arg) -> #fp_unop{op=Op, arg=Arg}. +%% is_fp_unop(F) -> is_insn_type(F, fp_unop). +fp_unop_arg(#fp_unop{arg=Arg}) -> Arg. +fp_unop_op(#fp_unop{op=Op}) -> Op. + +mk_fp_binop(Op, Src, Dst) -> #fp_binop{op=Op, src=Src, dst=Dst}. +%% is_fp_binop(F) -> is_insn_type(F, fp_binop). +fp_binop_src(#fp_binop{src=Src}) -> Src. +fp_binop_dst(#fp_binop{dst=Dst}) -> Dst. +fp_binop_op(#fp_binop{op=Op}) -> Op. + +mk_imul(ImmOpt, Src, Temp) -> #imul{imm_opt=ImmOpt, src=Src, temp=Temp}. +imul_imm_opt(#imul{imm_opt=ImmOpt}) -> ImmOpt. +imul_src(#imul{src=Src}) -> Src. +imul_temp(#imul{temp=Temp}) -> Temp. + +mk_jcc(Cc, Label) -> #jcc{cc=Cc, label=Label}. +%% is_jcc(Insn) -> is_insn_type(Insn, jcc). +jcc_cc(#jcc{cc=Cc}) -> Cc. +jcc_label(#jcc{label=Label}) -> Label. + +mk_jmp_fun(Fun, Linkage) -> + check_linkage(Linkage), + #jmp_fun{'fun'=Fun, linkage=Linkage}. +%% is_jmp_fun(Insn) -> is_insn_type(Insn, jmp_fun). +jmp_fun_fun(#jmp_fun{'fun'=Fun}) -> Fun. +jmp_fun_linkage(#jmp_fun{linkage=Linkage}) -> Linkage. + +mk_jmp_label(Label) -> #jmp_label{label=Label}. +%% is_jmp_label(Insn) -> is_insn_type(Insn, jmp_label). +jmp_label_label(#jmp_label{label=Label}) -> Label. + +mk_jmp_switch(Temp, JTab, Labels) -> + #jmp_switch{temp=Temp, jtab=JTab, labels=Labels}. +%% is_jmp_switch(Insn) -> is_insn_type(Insn, jmp_switch). +jmp_switch_temp(#jmp_switch{temp=Temp}) -> Temp. +jmp_switch_jtab(#jmp_switch{jtab=JTab}) -> JTab. +%% jmp_switch_labels(#jmp_switch{labels=Labels}) -> Labels. + +mk_label(Label) -> #label{label=Label}. +is_label(Insn) -> is_insn_type(Insn, label). +label_label(#label{label=Label}) -> Label. + +mk_lea(Mem, Temp) -> #lea{mem=Mem, temp=Temp}. +%% is_lea(Insn) -> is_insn_type(Insn, lea). +lea_mem(#lea{mem=Mem}) -> Mem. +lea_temp(#lea{temp=Temp}) -> Temp. + +mk_move(Src, Dst) -> #move{src=Src, dst=Dst}. +is_move(Insn) -> is_insn_type(Insn, move). +move_src(#move{src=Src}) -> Src. +move_dst(#move{dst=Dst}) -> Dst. + +mk_move64(Imm, Dst) -> #move64{imm=Imm, dst=Dst}. +%% is_move64(Insn) -> is_insn_type(Insn, move64). +move64_src(#move64{imm=Imm}) -> Imm. +move64_dst(#move64{dst=Dst}) -> Dst. + +mk_movsx(Src, Dst) -> #movsx{src=Src, dst=Dst}. +%% is_movsx(Insn) -> is_insn_type(Insn, movsx). +movsx_src(#movsx{src=Src}) -> Src. +movsx_dst(#movsx{dst=Dst}) -> Dst. + +mk_movzx(Src, Dst) -> #movzx{src=Src, dst=Dst}. +%% is_movzx(Insn) -> is_insn_type(Insn, movzx). +movzx_src(#movzx{src=Src}) -> Src. +movzx_dst(#movzx{dst=Dst}) -> Dst. + +mk_pseudo_call(Fun, SDesc, ContLab, Linkage) -> + check_linkage(Linkage), + #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +%% is_pseudo_call(Insn) -> is_insn_type(Insn, pseudo_call). +pseudo_call_fun(#pseudo_call{'fun'=Fun}) -> Fun. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. + +mk_pseudo_jcc(Cc, TrueLabel, FalseLabel, Pred) -> % 'smart' constructor + if Pred >= 0.5 -> + mk_pseudo_jcc_simple(neg_cc(Cc), FalseLabel, TrueLabel, 1.0-Pred); + true -> + mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) + end. +neg_cc(Cc) -> + case Cc of + 'e' -> 'ne'; % ==, != + 'ne' -> 'e'; % !=, == + 'g' -> 'le'; % >, <= + 'a' -> 'be'; % >u, <=u + 'ge' -> 'l'; % >=, < + 'ae' -> 'b'; % >=u, 'ge'; % <, >= + 'b' -> 'ae'; % =u + 'le' -> 'g'; % <=, > + 'be' -> 'a'; % <=u, >u + 'o' -> 'no'; % overflow, not_overflow + 'no' -> 'o'; % not_overflow, overflow + _ -> exit({?MODULE, {"unknown cc", Cc}}) + end. +mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) -> + #pseudo_jcc{cc=Cc, true_label=TrueLabel, false_label=FalseLabel, pred=Pred}. +%% is_pseudo_jcc(Insn) -> is_insn_type(Insn, pseudo_jcc). +%% pseudo_jcc_cc(#pseudo_jcc{cc=Cc}) -> Cc. +%% pseudo_jcc_true_label(#pseudo_jcc{true_label=TrueLabel}) -> TrueLabel. +%% pseudo_jcc_false_label(#pseudo_jcc{false_label=FalseLabel}) -> FalseLabel. +%% pseudo_jcc_pred(#pseudo_jcc{pred=Pred}) -> Pred. + +mk_pseudo_spill(List) -> + #pseudo_spill{args=List}. + +mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) -> + check_linkage(Linkage), + #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +%% is_pseudo_tailcall(Insn) -> is_insn_type(Insn, pseudo_tailcall). +pseudo_tailcall_fun(#pseudo_tailcall{'fun'=Fun}) -> Fun. +%% pseudo_tailcall_arity(#pseudo_tailcall{arity=Arity}) -> Arity. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. +%% is_pseudo_tailcall_prepare(Insn) -> is_insn_type(Insn, pseudo_tailcall_prepare). + +mk_push(Src) -> #push{src=Src}. +%% is_push(Insn) -> is_insn_type(Insn, push). +push_src(#push{src=Src}) -> Src. + +%% mk_pop(Dst) -> #pop{dst=Dst}. +%% is_push(Insn) -> is_insn_type(Insn, push). +pop_dst(#pop{dst=Dst}) -> Dst. + +mk_ret(NPop) -> #ret{npop=NPop}. +%% is_ret(Insn) -> is_insn_type(Insn, ret). +ret_npop(#ret{npop=NPop}) -> NPop. + +mk_shift(ShiftOp, Src, Dst) -> + #shift{shiftop=ShiftOp, src=Src, dst=Dst}. +%% is_shift(Insn) -> is_insn_type(Insn, shift). +shift_op(#shift{shiftop=ShiftOp}) -> ShiftOp. +shift_src(#shift{src=Src}) -> Src. +shift_dst(#shift{dst=Dst}) -> Dst. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. +%% defun_label_range(#defun{label_range=LabelRange}) -> LabelRange. + +%% highest_temp(Code) -> +%% highest_temp(Code,0). +%% +%% highest_temp([I|Is],Max) -> +%% Defs = hipe_x86_defuse:insn_def(I), +%% Uses = hipe_x86_defuse:insn_use(I), +%% highest_temp(Is,new_max(Defs++Uses,Max)); +%% highest_temp([],Max) -> +%% Max. +%% +%% new_max([V|Vs],Max) -> +%% case is_temp(V) of +%% true -> +%% TReg = temp_reg(V), +%% if TReg > Max -> +%% new_max(Vs, TReg); +%% true -> +%% new_max(Vs, Max) +%% end; +%% false -> +%% new_max(Vs, Max) +%% end; +%% new_max([],Max) -> Max. diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl new file mode 100644 index 0000000000..3d22fb381f --- /dev/null +++ b/lib/hipe/x86/hipe_x86.hrl @@ -0,0 +1,116 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% concrete representation of 2-address pseudo-x86 code + +%%%-------------------------------------------------------------------- +%%% x86 operands: +%%% +%%% int32 ::= +%%% reg ::= +%%% type ::= 'tagged' | 'untagged' +%%% label ::= +%%% label_type ::= 'label' | 'constant' +%%% aluop ::= +%%% term ::= +%%% cc ::= +%%% pred ::= +%%% npop ::= +%%% +%%% temp ::= {x86_temp, reg, type, allocatable} +%%% allocatable ::= 'true' | 'false' +%%% +%%% imm ::= {x86_imm, value} +%%% value ::= int32 | atom | {label, label_type} +%%% +%%% mem ::= {x86_mem, base, off, mem_type} +%%% base ::= temp | [] (XXX BUG: not quite true before RA) +%%% off ::= imm | temp +%%% mem_type ::= 'byte' | 'int16' (only valid with mov{s,z}x) +%%% | type +%%% +%%% src ::= temp | mem | imm +%%% dst ::= temp | mem +%%% arg ::= src +%%% args ::= +%%% +%%% mfa ::= {x86_mfa, atom, atom, byte} +%%% prim ::= {x86_prim, atom} +%%% fun ::= mfa | prim | temp | mem +%%% +%%% jtab ::= label (equiv. to {x86_imm,{label,'constant'}}) +%%% +%%% sdesc ::= {x86_sdesc, exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= int32 + +-record(x86_temp, {reg, type, allocatable}). +-record(x86_imm, {value}). +-record(x86_mem, {base, off, type}). +-record(x86_fpreg, {reg, pseudo}). +-record(x86_mfa, {m::atom(), f::atom(), a::arity()}). +-record(x86_prim, {prim}). +-record(x86_sdesc, {exnlab, fsize, arity::arity(), live::tuple()}). + +%%% Basic instructions. +%%% These follow the AT&T convention, i.e. op src,dst (dst := dst op src) +%%% After register allocation, at most one operand in a binary +%%% instruction (alu, cmp, move) may denote a memory cell. +%%% After frame allocation, every temp must denote a physical register. + +-record(alu, {aluop, src, dst}). +-record(call, {'fun', sdesc, linkage}). +-record(cmovcc, {cc, src, dst}). +-record(cmp, {src, dst}). % a 'sub' alu which doesn't update dst +-record(comment, {term}). +-record(fmove, {src, dst}). +-record(fp_binop, {op, src, dst}). +-record(fp_unop, {op, arg}). % arg may be [] :-( +-record(imul, {imm_opt, src, temp}). % imm_opt:[]|imm, src:temp|mem +-record(jcc, {cc, label}). +-record(jmp_fun, {'fun', linkage}). % tailcall, direct or indirect +-record(jmp_label, {label}). % local jmp, direct +-record(jmp_switch, {temp, jtab, labels}). % local jmp, indirect +-record(label, {label}). +-record(lea, {mem, temp}). +-record(move, {src, dst}). +-record(move64, {imm, dst}). +-record(movsx, {src, dst}). +-record(movzx, {src, dst}). +-record(pseudo_call, {'fun', sdesc, contlab, linkage}). +-record(pseudo_jcc, {cc, true_label, false_label, pred}). +-record(pseudo_spill, {args=[]}). +-record(pseudo_tailcall, {'fun', arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(push, {src}). +-record(pop, {dst}). +-record(ret, {npop}). % EAX is live-in +-record(shift, {shiftop, src, dst}). +-record(test, {src, dst}). + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl new file mode 100644 index 0000000000..4e65736db3 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_assemble.erl @@ -0,0 +1,1014 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% HiPE/x86 assembler +%%% +%%% TODO: +%%% - Simplify combine_label_maps and mk_data_relocs. +%%% - Move find_const to hipe_pack_constants? + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble). +-define(HIPE_X86_ENCODE, hipe_amd64_encode). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_PP, hipe_amd64_pp). +-ifdef(AMD64_SIMULATE_NSP). +-define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP). +-endif. +-define(EAX, rax). +-define(REGArch, reg64). +-define(RMArch, rm64). +-define(EA_DISP32_ABSOLUTE, ea_disp32_sindex). +-else. +-define(HIPE_X86_ASSEMBLE, hipe_x86_assemble). +-define(HIPE_X86_ENCODE, hipe_x86_encode). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(EAX, eax). +-define(REGArch, reg32). +-define(RMArch, rm32). +-define(EA_DISP32_ABSOLUTE, ea_disp32). +-endif. + +-module(?HIPE_X86_ASSEMBLE). +-export([assemble/4]). + +-define(DEBUG,true). + +-include("../main/hipe.hrl"). +-include("../x86/hipe_x86.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + ?when_option(time, Options, ?start_timer("x86 assembler")), + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_x86:defun_code(Defun), + hipe_x86:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap, Options), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% put(code_size, CodeSize), + %% put(const_size, ConstSize), + %% ?when_option(verbose, Options, + %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + %% ?when_option(time, Options, ?stop_timer("x86 assembler")), + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% Build LabelMap for each MFA. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap, Options) -> + translate_mfas(Code, ConstMap, [], Options). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options); +translate_mfas([], _ConstMap, NewCode, _Options) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) -> + NewIs = translate_insn(I, Context, Options), + add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options); +translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {jcc_sdi,{_,{label,L}},_} -> + SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + {jmp_sdi,{{label,L}},_} -> + SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options); +add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) -> + translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options). + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.sdesc',_,_} -> 0; + {jcc_sdi,_,_} -> 2; + {jmp_sdi,_,_} -> 2; + {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg) + end. + +translate_insn(I, Context, Options) -> + case I of + #alu{} -> + Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), + [{hipe_x86:alu_op(I), Arg, I}]; + #call{} -> + translate_call(I); + #cmovcc{} -> + {Dst,Src} = resolve_move_args( + hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I), + Context), + CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))}, + Arg = {CC,Dst,Src}, + [{cmovcc, Arg, I}]; + #cmp{} -> + Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context), + [{cmp, Arg, I}]; + #comment{} -> + []; + #fmove{} -> + {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I), + hipe_x86:fmove_dst(I)), + [{Op, Arg, I}]; + #fp_binop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{hipe_x86:fp_binop_op(I), Arg, I}]; + false -> % sse2 + Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I), + hipe_x86:fp_binop_dst(I)), + [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}] + end; + #fp_unop{} -> + case proplists:get_bool(x87, Options) of + true -> % x87 + Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)), + [{hipe_x86:fp_unop_op(I), Arg, I}]; + false -> % sse2 + case hipe_x86:fp_unop_op(I) of + 'fchs' -> + Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)), + [{'xorpd', Arg, I}]; + 'fwait' -> % no op on sse2, magic on x87 + [] + end + end; + #imul{} -> + translate_imul(I, Context); + #jcc{} -> + Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))}, + Label = translate_label(hipe_x86:jcc_label(I)), + [{jcc_sdi, {Cc,Label}, I}]; + #jmp_fun{} -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:jmp_fun_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt), + [{jmp, {Arg}, I}]; + #jmp_label{} -> + Arg = translate_label(hipe_x86:jmp_label_label(I)), + [{jmp_sdi, {Arg}, I}]; + #jmp_switch{} -> + RM32 = resolve_jmp_switch_arg(I, Context), + [{jmp, {RM32}, I}]; + #label{} -> + [{'.label', hipe_x86:label_label(I), I}]; + #lea{} -> + Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)), + [{lea, Arg, I}]; + #move{} -> + Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I), + Context), + [{mov, Arg, I}]; + #move64{} -> + translate_move64(I, Context); + #movsx{} -> + Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)), + [{movsx, Arg, I}]; + #movzx{} -> + Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)), + [{movzx, Arg, I}]; + %% pseudo_call: eliminated before assembly + %% pseudo_jcc: eliminated before assembly + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #pop{} -> + Arg = translate_dst(hipe_x86:pop_dst(I)), + [{pop, {Arg}, I}]; + #push{} -> + Arg = translate_src(hipe_x86:push_src(I), Context), + [{push, {Arg}, I}]; + #ret{} -> + translate_ret(I); + #shift{} -> + Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context), + [{hipe_x86:shift_op(I), Arg, I}]; + #test{} -> + Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context), + [{test, Arg, I}] + end. + +-ifdef(X86_SIMULATE_NSP). +-ifdef(HIPE_AMD64). +translate_call(I) -> + WordSize = hipe_amd64_registers:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + RegRA = + begin + RegTemp0 = hipe_amd64_registers:temp0(), + RegTemp1 = hipe_amd64_registers:temp1(), + case Fun of + #x86_temp{reg=RegTemp0} -> RegTemp1; + #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1; + _ -> RegTemp0 + end + end, + TempRA = hipe_x86:mk_temp(RegRA, untagged), + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I3 = {jmp, {JmpArg}, #comment{term=call}}, + Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}), + MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + temp_to_regArch(TempRA)}, + I2 = {mov, MovArgs, #comment{term=call}}, + Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs), + I1 = {lea, {temp_to_regArch(TempRA), + {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3,I4]. +-else. +translate_call(I) -> + WordSize = ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + FunOrig = hipe_x86:call_fun(I), + Fun = + case FunOrig of + #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> + FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; + _ -> FunOrig + end, + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + JmpArg = translate_fun(Fun, PatchTypeExt), + I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, + I2 = {jmp, {JmpArg}, #comment{term=call}}, + Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}), + I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged)), + {imm32,{?X86ABSPCREL,4+Size2}}}, + #comment{term=call}}, + I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, + [I0,I1,I2,I3]. +-endif. + +translate_ret(I) -> + NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(), + RegSP = 2#100, % esp/rsp + TempSP = hipe_x86:mk_temp(RegSP, untagged), + RegRA = 2#011, % ebx/rbx + TempRA = hipe_x86:mk_temp(RegRA, untagged), + [{mov, + {temp_to_regArch(TempRA), + mem_to_rmArch(hipe_x86:mk_mem(TempSP, + hipe_x86:mk_imm(0), + untagged))}, + I}, + {add, + {temp_to_rmArch(TempSP), + case NPOP < 128 of + true -> {imm8,NPOP}; + false -> {imm32,NPOP} + end}, + #comment{term=ret}}, + {jmp, + {temp_to_rmArch(TempRA)}, + #comment{term=ret}}]. + +-else. % not X86_SIMULATE_NSP + +translate_call(I) -> + %% call and jmp are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case hipe_x86:call_linkage(I) of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt), + SDesc = hipe_x86:call_sdesc(I), + [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}]. + +translate_ret(I) -> + Arg = + case hipe_x86:ret_npop(I) of + 0 -> {}; + N -> {{imm16,N}} + end, + [{ret, Arg, I}]. + +-endif. % X86_SIMULATE_NSP + +translate_imul(I, Context) -> + Temp = temp_to_regArch(hipe_x86:imul_temp(I)), + Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)), + Args = + case hipe_x86:imul_imm_opt(I) of + [] -> {Temp,Src}; + Imm -> {Temp,Src,translate_imm(Imm, Context, true)} + end, + [{'imul', Args, I}]. + +temp_or_mem_to_rmArch(Src) -> + case Src of + #x86_temp{} -> temp_to_rmArch(Src); + #x86_mem{} -> mem_to_rmArch(Src) + end. + +translate_label(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +translate_fun(Arg, PatchTypeExt) -> + case Arg of + #x86_temp{} -> + temp_to_rmArch(Arg); + #x86_mem{} -> + mem_to_rmArch(Arg); + #x86_mfa{m=M,f=F,a=A} -> + {rel32,{PatchTypeExt,{M,F,A}}}; + #x86_prim{prim=Prim} -> + {rel32,{PatchTypeExt,Prim}} + end. + +translate_src(Src, Context) -> + case Src of + #x86_imm{} -> + translate_imm(Src, Context, true); + _ -> + translate_dst(Src) + end. + +%%% MayTrunc8 controls whether negative Imm8s should be truncated +%%% to 8 bits or not. Truncation should always be done, except when +%%% the caller will widen the Imm8 to an Imm32 or Imm64. +translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) -> + if is_atom(Imm) -> + {imm32,{?LOAD_ATOM,Imm}}; + is_integer(Imm) -> + case (Imm =< 127) and (Imm >= -128) of + true -> + Imm8 = + case MayTrunc8 of + true -> Imm band 16#FF; + false -> Imm + end, + {imm8,Imm8}; + false -> + {imm32,Imm} + end; + true -> + Val = + case Imm of + {Label,constant} -> + {MFA,ConstMap} = Context, + ConstNo = find_const({MFA,Label}, ConstMap), + {constant,ConstNo}; + {Label,closure} -> + {closure,Label}; + {Label,c_const} -> + {c_const,Label} + end, + {imm32,{?LOAD_ADDRESS,Val}} + end. + +translate_dst(Dst) -> + case Dst of + #x86_temp{} -> + temp_to_regArch(Dst); + #x86_mem{type='double'} -> + mem_to_rm64fp(Dst); + #x86_mem{} -> + mem_to_rmArch(Dst); + #x86_fpreg{} -> + fpreg_to_stack(Dst) + end. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead? + +align_entry(Address) -> Address + nr_pad_bytes(Address). + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, align_entry(Size+CodeSize)); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for:~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = align_entry(Address + CodeSize), + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode, Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.sdesc',SDesc,_} -> + #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + Reloc = {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)}, + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address), + print_insn(Address, Bytes, I, Options), + Segment = list_to_binary(Bytes), + Size = byte_size(Segment), + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options) + end; +encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case nr_pad_bytes(Address) of + 0 -> + {Address,Relocs,AccCode}; + NrPadBytes -> % triggers at most once per function body + Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}), + encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options) + end. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {jcc_sdi,{CC,{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 6), + {jcc,{CC,{rel32,LongOffset}},OrigI} + end; + {jmp_sdi,{{label,L}},OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ShortOffset = LabelAddress - (InsnAddress + 2), + if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> + {jmp,{{rel8,ShortOffset band 16#FF}},OrigI}; + true -> + LongOffset = LabelAddress - (InsnAddress + 5), + {jmp,{{rel32,LongOffset}},OrigI} + end; + _ -> I + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fpreg_to_stack(#x86_fpreg{reg=Reg}) -> + {fpst, Reg}. + +temp_to_regArch(#x86_temp{reg=Reg}) -> + {?REGArch, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_reg64(#x86_temp{reg=Reg}) -> + {reg64, Reg}. +-endif. + +temp_to_reg32(#x86_temp{reg=Reg}) -> + {reg32, Reg}. +temp_to_reg16(#x86_temp{reg=Reg}) -> + {reg16, Reg}. +temp_to_reg8(#x86_temp{reg=Reg}) -> + {reg8, Reg}. + +temp_to_xmm(#x86_temp{reg=Reg}) -> + {xmm, Reg}. + +-ifdef(HIPE_AMD64). +temp_to_rm64(#x86_temp{reg=Reg}) -> + {rm64, hipe_amd64_encode:rm_reg(Reg)}. +-endif. + +temp_to_rmArch(#x86_temp{reg=Reg}) -> + {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. +temp_to_rm64fp(#x86_temp{reg=Reg}) -> + {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}. + +mem_to_ea(Mem) -> + EA = mem_to_ea_common(Mem), + {ea, EA}. + +mem_to_rm32(Mem) -> + EA = mem_to_ea_common(Mem), + {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rmArch(Mem) -> + EA = mem_to_ea_common(Mem), + {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm64fp(Mem) -> + EA = mem_to_ea_common(Mem), + {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +%%%%%%%%%%%%%%%%% +mem_to_rm8(Mem) -> + EA = mem_to_ea_common(Mem), + {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}. + +mem_to_rm16(Mem) -> + EA = mem_to_ea_common(Mem), + {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}. +%%%%%%%%%%%%%%%%% + +mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) -> + ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off); +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) -> + case Base band 2#111 of + 5 -> % ebp/rbp or r13 + case Index band 2#111 of + 5 -> % ebp/rbp or r13 + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB); + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base), + SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; + _ -> + SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), + SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), + ?HIPE_X86_ENCODE:ea_sib(SIB) + end; +mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) -> + if + Off =:= 0 -> + case Base of + 4 -> %esp, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 5 -> %ebp, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + 12 -> %r12, use SIB w/o disp8 + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_sib(SIB); + 13 -> %r13, use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); + _ -> %neither SIB nor disp8 needed + ?HIPE_X86_ENCODE:ea_base(Base) + end; + Off >= -128, Off =< 127 -> + Disp8 = Off band 16#FF, + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); + _ -> %use disp8 w/o SIB + ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base) + end; + true -> + case Base of + 4 -> %esp, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + 12 -> %r12, must use SIB + SIB = ?HIPE_X86_ENCODE:sib(Base), + ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); + _ -> + ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base) + end + end. + +%% jmp_switch +-ifdef(HIPE_AMD64). +resolve_jmp_switch_arg(I, _Context) -> + Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)), + Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)), + SINDEX = hipe_amd64_encode:sindex(3, Index), + SIB = hipe_amd64_encode:sib(Base, SINDEX), + EA = + if (Base =:= 5) or (Base =:= 13) -> + hipe_amd64_encode:ea_disp8_sib(0, SIB); + true -> + hipe_amd64_encode:ea_sib(SIB) + end, + {rm64,hipe_amd64_encode:rm_mem(EA)}. +-else. +resolve_jmp_switch_arg(I, {MFA,ConstMap}) -> + ConstNo = find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap), + Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}}, + SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))), + EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly + {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}. +-endif. + +%% lea reg, mem +resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst),mem_to_ea(Src)}. + +resolve_sse2_op(Op) -> + case Op of + fadd -> addsd; + fdiv -> divsd; + fmul -> mulsd; + fsub -> subsd; + _ -> exit({?MODULE, unknown_sse2_operator, Op}) + end. + +%% OP xmm, mem +resolve_sse2_binop_args(Src=#x86_mem{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),mem_to_rm64fp(Src)}; +%% movsd mem, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_mem{type=double}) -> + {mem_to_rm64fp(Dst),temp_to_xmm(Src)}; +%% OP xmm, xmm +resolve_sse2_binop_args(Src=#x86_temp{type=double}, + Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst),temp_to_rm64fp(Src)}. + +%%% fmove -> cvtsi2sd or movsd +resolve_sse2_fmove_args(Src, Dst) -> + case {Src,Dst} of + {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg + {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}}; + {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem + {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}}; + _ -> % movsd + {movsd, resolve_sse2_binop_args(Src, Dst)} + end. + +%%% xorpd xmm, mem +resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) -> + {temp_to_xmm(Dst), + {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE( + {?LOAD_ADDRESS, + {c_const, sse2_fnegate_mask}})}}}. + +%% mov mem, imm +resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm + {mem_to_rm8(Dst),{imm8,ByteImm}}; + int16 -> + {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}}; + int32 -> + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {mem_to_rm32(Dst),{imm32,Imm}}; + _ -> + RMArch = mem_to_rmArch(Dst), + {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), + {RMArch,{imm32,Imm}} + end; + +%% mov reg,mem +resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) -> + case Type of + int32 -> % must be unsigned + {temp_to_reg32(Dst),mem_to_rm32(Src)}; + _ -> + {temp_to_regArch(Dst),mem_to_rmArch(Src)} + end; + +%% mov mem,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) -> + case Type of % to support byte, int16 and int32 stores + byte -> + {mem_to_rm8(Dst),temp_to_reg8(Src)}; + int16 -> + {mem_to_rm16(Dst),temp_to_reg16(Src)}; + int32 -> + {mem_to_rm32(Dst),temp_to_reg32(Src)}; + tagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)}; + untagged -> % tagged, untagged + {mem_to_rmArch(Dst),temp_to_regArch(Src)} + end; + +%% mov reg,reg +resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) -> + {temp_to_regArch(Dst),temp_to_rmArch(Src)}; + +%% mov reg,imm +resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + imm_move_args(Dst, Imm). + +-ifdef(HIPE_AMD64). +imm_move_args(Dst, Imm) -> + if is_number(Imm), Imm >= 0 -> + {temp_to_reg32(Dst),{imm32,Imm}}; + true -> + {temp_to_rm64(Dst),{imm32,Imm}} + end. +-else. +imm_move_args(Dst, Imm) -> + {temp_to_reg32(Dst),{imm32,Imm}}. +-endif. + +-ifdef(HIPE_AMD64). +translate_move64(I, Context) -> + Arg = resolve_move64_args(hipe_x86:move64_src(I), + hipe_x86:move64_dst(I), + Context), + [{mov, Arg, I}]. + +%% mov reg,imm64 +resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) -> + {_,Imm} = translate_imm(Src, Context, false), + {temp_to_reg64(Dst),{imm64,Imm}}. +-else. +translate_move64(I, _Context) -> exit({?MODULE, I}). +-endif. + +%%% mov{s,z}x +resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) -> + {temp_to_regArch(Dst), + case Type of + byte -> + mem_to_rm8(Src); + int16 -> + mem_to_rm16(Src); + int32 -> + mem_to_rm32(Src) + end}. + +%%% alu/cmp (_not_ test) +resolve_alu_args(Src, Dst, Context) -> + case {Src,Dst} of + {#x86_imm{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), translate_imm(Src, Context, true)}; + {#x86_mem{}, #x86_temp{}} -> + {temp_to_regArch(Dst), mem_to_rmArch(Src)}; + {#x86_temp{}, #x86_mem{}} -> + {mem_to_rmArch(Dst), temp_to_regArch(Src)}; + {#x86_temp{}, #x86_temp{}} -> + {temp_to_regArch(Dst), temp_to_rmArch(Src)}; + {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm + NewSrc = translate_imm(Src, Context, true), + NewDst = + case NewSrc of + {imm8,_} -> temp_to_rmArch(Dst); + {imm32,_} -> ?EAX + end, + {NewDst, NewSrc}; + {#x86_imm{}, #x86_temp{}} -> + {temp_to_rmArch(Dst), translate_imm(Src, Context, true)} + end. + +%%% test +resolve_test_args(Src, Dst, Context) -> + case Src of + #x86_imm{} -> % imm8 not allowed + {_ImmSize,ImmValue} = translate_imm(Src, Context, false), + NewDst = + case Dst of + #x86_temp{reg=0} -> ?EAX; + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, {imm32,ImmValue}}; + #x86_temp{} -> + NewDst = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + {NewDst, temp_to_regArch(Src)} + end. + +%%% shifts +resolve_shift_args(Src, Dst, Context) -> + RM32 = + case Dst of + #x86_temp{} -> temp_to_rmArch(Dst); + #x86_mem{} -> mem_to_rmArch(Dst) + end, + Count = + case Src of + #x86_imm{value=1} -> 1; + #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8 + #x86_temp{reg=1} -> cl % temp must be ecx + end, + {RM32, Count}. + +%% x87_binop mem +resolve_x87_unop_arg(Arg=#x86_mem{type=Type})-> + case Type of + 'double' -> {mem_to_rm64fp(Arg)}; + 'untagged' -> {mem_to_rmArch(Arg)}; + _ -> ?EXIT({fmovArgNotSupported,{Arg}}) + end; +resolve_x87_unop_arg(Arg=#x86_fpreg{}) -> + {fpreg_to_stack(Arg)}; +resolve_x87_unop_arg([]) -> + []. + +%% x87_binop mem, st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})-> + {mem_to_rm64fp(Dst),fpreg_to_stack(Src)}; +%% x87_binop st(0), st(i) +resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})-> + {fpreg_to_stack(Dst),fpreg_to_stack(Src)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({MFA,L},LabelMap) -> + gb_trees:get({MFA,L}, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Bytes, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)), + ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)). + +print_code_list_2([H | Tail]) -> + print_byte(H), + io:format(","), + print_code_list_2(Tail); +print_code_list_2([]) -> + io:format(""). + +print_insn_2(Address, Bytes, {_,_,OrigI}) -> + io:format("~8.16b | ", [Address]), + print_code_list(Bytes, 0), + ?HIPE_X86_PP:pp_insn(OrigI). + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(24-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N,[_|R]) -> + find_const(N,R); +find_const(C,[]) -> + ?EXIT({constant_not_found,C}). diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl new file mode 100644 index 0000000000..d15dcc061a --- /dev/null +++ b/lib/hipe/x86/hipe_x86_cfg.erl @@ -0,0 +1,147 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_x86_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, pred/2, + bb/2, bb_add/3]). +-export([postorder/1, reverse_postorder/1]). +-export([linearise/1, params/1, arity/1, redirect_jmp/3]). + +%%% these tell cfg.inc what to define (ugly as hell) +-define(PRED_NEEDED,true). +-define(BREADTH_ORDER,true). +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). + +-include("hipe_x86.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +init(Defun) -> + %% XXX: this assumes that the code starts with a label insn. + %% Is that guaranteed? + Code = hipe_x86:defun_code(Defun), + StartLab = hipe_x86:label_label(hd(Code)), + Data = hipe_x86:defun_data(Defun), + IsClosure = hipe_x86:defun_is_closure(Defun), + MFA = hipe_x86:defun_mfa(Defun), + IsLeaf = hipe_x86:defun_is_leaf(Defun), + Formals = hipe_x86:defun_formals(Defun), + CFG0 = mk_empty_cfg(MFA, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG0). + +is_branch(I) -> + case I of + #jmp_fun{} -> true; + #jmp_label{} -> true; + #jmp_switch{} -> true; + #pseudo_call{} -> true; + #pseudo_jcc{} -> true; + #pseudo_tailcall{} -> true; + #ret{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #jmp_fun{} -> []; + #jmp_label{label=Label} -> [Label]; + #jmp_switch{labels=Labels} -> Labels; + #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_jcc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_tailcall{} -> []; + #ret{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +redirect_jmp(I, Old, New) -> + case I of + #jmp_label{label=Label} -> + if Old =:= Label -> I#jmp_label{label=New}; + true -> I + end; + #pseudo_jcc{true_label=TrueLab, false_label=FalseLab} -> + J0 = if Old =:= TrueLab -> I#pseudo_jcc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> J0#pseudo_jcc{false_label=New}; + true -> J0 + end; + %% handle pseudo_call too? + _ -> I + end. + +%%% XXX: fix if labels can occur in operands +%% redirect_ops(_Labels, CFG, _Map) -> +%% CFG. + +mk_goto(Label) -> + hipe_x86:mk_jmp_label(Label). + +is_label(I) -> + hipe_x86:is_label(I). + +label_name(Label) -> + hipe_x86:label_label(Label). + +mk_label(Name) -> + hipe_x86:mk_label(Name). + +%% is_comment(I) -> +%% hipe_x86:is_comment(I). +%% +%% is_goto(I) -> +%% hipe_x86:is_jmp_label(I). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(x86), + LabelRange = hipe_gensym:label_range(x86), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_x86:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M,_F,A} = function(CFG), + A. + +%% init_gensym(CFG) -> +%% HighestVar = find_highest_var(CFG), +%% HighestLabel = find_highest_label(CFG), +%% hipe_gensym:init(), +%% hipe_gensym:set_var(x86, HighestVar), +%% hipe_gensym:set_label(x86, HighestLabel). +%% +%% highest_var(Code) -> +%% hipe_x86:highest_temp(Code). diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl new file mode 100644 index 0000000000..3387f77595 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_defuse.erl @@ -0,0 +1,160 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% compute def/use sets for x86 insns +%%% +%%% TODO: +%%% - represent EFLAGS (condition codes) use/def by a virtual reg? +%%% - should push use/def %esp? + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(RV, rax). +-else. +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(RV, eax). +-endif. + +-module(?HIPE_X86_DEFUSE). +-export([insn_def/1, insn_use/1]). %% src_use/1]). +-include("../x86/hipe_x86.hrl"). + +%%% +%%% insn_def(Insn) -- Return set of temps defined by an instruction. +%%% + +insn_def(I) -> + case I of + #alu{dst=Dst} -> dst_def(Dst); + #cmovcc{dst=Dst} -> dst_def(Dst); + #fmove{dst=Dst} -> dst_def(Dst); + #fp_binop{dst=Dst} -> dst_def(Dst); + #fp_unop{arg=Arg} -> dst_def(Arg); + #imul{temp=Temp} -> [Temp]; + #lea{temp=Temp} -> [Temp]; + #move{dst=Dst} -> dst_def(Dst); + #move64{dst=Dst} -> dst_def(Dst); + #movsx{dst=Dst} -> dst_def(Dst); + #movzx{dst=Dst} -> dst_def(Dst); + #pseudo_call{} -> call_clobbered(); + #pseudo_spill{} -> []; + #pseudo_tailcall_prepare{} -> tailcall_clobbered(); + #shift{dst=Dst} -> dst_def(Dst); + %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label + %% pseudo_jcc, pseudo_tailcall, push, ret + _ -> [] + end. + +dst_def(Dst) -> + case Dst of + #x86_temp{} -> [Dst]; + #x86_fpreg{} -> [Dst]; + _ -> [] + end. + +call_clobbered() -> + [hipe_x86:mk_temp(R, T) + || {R,T} <- ?HIPE_X86_REGISTERS:call_clobbered()]. + +tailcall_clobbered() -> + [hipe_x86:mk_temp(R, T) + || {R,T} <- ?HIPE_X86_REGISTERS:tailcall_clobbered()]. + +%%% +%%% insn_use(Insn) -- Return set of temps used by an instruction. +%%% + +insn_use(I) -> + case I of + #alu{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #call{'fun'=Fun} -> addtemp(Fun, []); + #cmovcc{src=Src, dst=Dst} -> addtemp(Src, dst_use(Dst)); + #cmp{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #fmove{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #fp_unop{arg=Arg} -> addtemp(Arg, []); + #fp_binop{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + #imul{imm_opt=ImmOpt,src=Src,temp=Temp} -> + addtemp(Src, case ImmOpt of [] -> addtemp(Temp, []); _ -> [] end); + #jmp_fun{'fun'=Fun} -> addtemp(Fun, []); + #jmp_switch{temp=Temp, jtab=JTab} -> addtemp(Temp, addtemp(JTab, [])); + #lea{mem=Mem} -> addtemp(Mem, []); + #move{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #move64{} -> []; + #movsx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #movzx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst)); + #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} -> + addtemp(Fun, arity_use(Arity)); + #pseudo_spill{args=Args} -> Args; + #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} -> + addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(), + arity_use(Arity)))); + #push{src=Src} -> addtemp(Src, []); + #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')]; + #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, [])); + %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare + _ -> [] + end. + +arity_use(Arity) -> + [hipe_x86:mk_temp(R, 'tagged') + || R <- ?HIPE_X86_REGISTERS:args(Arity)]. + +dst_use(Dst) -> + case Dst of + #x86_mem{base=Base,off=Off} -> addbase(Base, addtemp(Off, [])); + _ -> [] + end. + +%%% +%%% src_use(Src) -- Return set of temps used by a source operand. +%%% + +%% src_use(Src) -> +%% addtemp(Src, []). + +%%% +%%% Auxiliary operations on sets of temps +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Arg, Set) -> + case Arg of + #x86_temp{} -> add(Arg, Set); + #x86_mem{base=Base,off=Off} -> addtemp(Off, addbase(Base, Set)); + #x86_fpreg{} -> add(Arg, Set); + _ -> Set + end. + +addbase(Base, Set) -> + case Base of + [] -> Set; + _ -> addtemp(Base, Set) + end. + +add(Arg, Set) -> + case lists:member(Arg, Set) of + false -> [Arg|Set]; + _ -> Set + end. diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl new file mode 100644 index 0000000000..db7f53ad26 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_encode.erl @@ -0,0 +1,1302 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Copyright (C) 2000-2005 Mikael Pettersson +%%% +%%% This is the syntax of x86 r/m operands: +%%% +%%% opnd ::= reg mod == 11 +%%% | MEM[ea] mod != 11 +%%% +%%% ea ::= disp32(reg) mod == 10, r/m != ESP +%%% | disp32 sib12 mod == 10, r/m == 100 +%%% | disp8(reg) mod == 01, r/m != ESP +%%% | disp8 sib12 mod == 01, r/m == 100 +%%% | (reg) mod == 00, r/m != ESP and EBP +%%% | sib0 mod == 00, r/m == 100 +%%% | disp32 mod == 00, r/m == 101 [on x86-32] +%%% | disp32(%rip) mod == 00, r/m == 101 [on x86-64] +%%% +%%% // sib0: mod == 00 +%%% sib0 ::= disp32(,index,scale) base == EBP, index != ESP +%%% | disp32 base == EBP, index == 100 +%%% | (base,index,scale) base != EBP, index != ESP +%%% | (base) base != EBP, index == 100 +%%% +%%% // sib12: mod == 01 or 10 +%%% sib12 ::= (base,index,scale) index != ESP +%%% | (base) index == 100 +%%% +%%% scale ::= 00 | 01 | 10 | 11 index << scale +%%% +%%% Notes: +%%% +%%% 1. ESP cannot be used as index register. +%%% 2. Use of ESP as base register requires a SIB byte. +%%% 3. disp(reg), when reg != ESP, can be represented without +%%% [r/m == reg] or with [r/m == 100, base == reg] a SIB byte. +%%% 4. disp32 can be represented without [mod == 00, r/m == 101] +%%% or with [mod == 00, r/m == 100, base == 101, index == 100] +%%% a SIB byte. +%%% 5. x86-32 and x86-64 interpret mod==00b r/m==101b EAs differently: +%%% on x86-32 the disp32 is an absolute address, but on x86-64 the +%%% disp32 is relative to the %rip of the next instruction. +%%% Absolute disp32s need a SIB on x86-64. + +-module(hipe_x86_encode). + +-export([% condition codes + cc/1, + % 8-bit registers + %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0, + % 32-bit registers + %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0, + % operands + sindex/2, sib/1, sib/2, + ea_disp32_base/2, ea_disp32_sib/2, + ea_disp8_base/2, ea_disp8_sib/2, + ea_base/1, + %% ea_disp32_sindex/1, % XXX: do not use on x86-32, only on x86-64 + ea_disp32_sindex/2, + ea_sib/1, ea_disp32/1, + rm_reg/1, rm_mem/1, + % instructions + insn_encode/3, insn_sizeof/2]). + +%%-define(DO_HIPE_X86_ENCODE_TEST,true). +-ifdef(DO_HIPE_X86_ENCODE_TEST). +-export([dotest/0, dotest/1]). % for testing, don't use +-endif. + +-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end). +%-define(ASSERT(F,G), []). + +%%% condition codes + +-define(CC_O, 2#0000). % overflow +-define(CC_NO, 2#0001). % no overflow +-define(CC_B, 2#0010). % below, =u +-define(CC_E, 2#0100). % equal +-define(CC_NE, 2#0101). % not equal +-define(CC_BE, 2#0110). % below or equal, <=u +-define(CC_A, 2#0111). % above, >u +-define(CC_S, 2#1000). % sign, + +-define(CC_NS, 2#1001). % not sign, - +-define(CC_PE, 2#1010). % parity even +-define(CC_PO, 2#1011). % parity odd +-define(CC_L, 2#1100). % less than, =s +-define(CC_LE, 2#1110). % less or equal, <=s +-define(CC_G, 2#1111). % greater than, >s + +cc(o) -> ?CC_O; +cc(no) -> ?CC_NO; +cc(b) -> ?CC_B; +cc(ae) -> ?CC_AE; +cc(e) -> ?CC_E; +cc(ne) -> ?CC_NE; +cc(be) -> ?CC_BE; +cc(a) -> ?CC_A; +cc(s) -> ?CC_S; +cc(ns) -> ?CC_NS; +cc(pe) -> ?CC_PE; +cc(po) -> ?CC_PO; +cc(l) -> ?CC_L; +cc(ge) -> ?CC_GE; +cc(le) -> ?CC_LE; +cc(g) -> ?CC_G. + +%%% 8-bit registers + +-define(AL, 2#000). +-define(CL, 2#001). +-define(DL, 2#010). +-define(BL, 2#011). +-define(AH, 2#100). +-define(CH, 2#101). +-define(DH, 2#110). +-define(BH, 2#111). + +%% al() -> ?AL. +%% cl() -> ?CL. +%% dl() -> ?DL. +%% bl() -> ?BL. +%% ah() -> ?AH. +%% ch() -> ?CH. +%% dh() -> ?DH. +%% bh() -> ?BH. + +%%% 32-bit registers + +-define(EAX, 2#000). +-define(ECX, 2#001). +-define(EDX, 2#010). +-define(EBX, 2#011). +-define(ESP, 2#100). +-define(EBP, 2#101). +-define(ESI, 2#110). +-define(EDI, 2#111). + +%% eax() -> ?EAX. +%% ecx() -> ?ECX. +%% edx() -> ?EDX. +%% ebx() -> ?EBX. +%% esp() -> ?ESP. +%% ebp() -> ?EBP. +%% esi() -> ?ESI. +%% edi() -> ?EDI. + +%%% r/m operands + +sindex(Scale, Index) when is_integer(Scale), is_integer(Index) -> + ?ASSERT(sindex, Scale >= 0), + ?ASSERT(sindex, Scale =< 3), + ?ASSERT(sindex, Index =/= ?ESP), + {sindex, Scale, Index}. + +-record(sib, {sindex_opt, base :: integer()}). +sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}. +sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}. + +ea_disp32_base(Disp32, Base) when is_integer(Base) -> + ?ASSERT(ea_disp32_base, Base =/= ?ESP), + {ea_disp32_base, Disp32, Base}. +ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}. +ea_disp8_base(Disp8, Base) when is_integer(Base) -> + ?ASSERT(ea_disp8_base, Base =/= ?ESP), + {ea_disp8_base, Disp8, Base}. +ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}. +ea_base(Base) when is_integer(Base) -> + ?ASSERT(ea_base, Base =/= ?ESP), + ?ASSERT(ea_base, Base =/= ?EBP), + {ea_base, Base}. +%% ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}. +ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}. +ea_sib(SIB) -> + ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP), + {ea_sib, SIB}. +ea_disp32(Disp32) -> {ea_disp32, Disp32}. + +rm_reg(Reg) -> {rm_reg, Reg}. +rm_mem(EA) -> {rm_mem, EA}. + +mk_modrm(Mod, RO, RM) -> + (Mod bsl 6) bor (RO bsl 3) bor RM. + +mk_sib(Scale, Index, Base) -> + (Scale bsl 6) bor (Index bsl 3) bor Base. + +le16(Word, Tail) -> + [Word band 16#FF, (Word bsr 8) band 16#FF | Tail]. + +le32(Word, Tail) when is_integer(Word) -> + [Word band 16#FF, (Word bsr 8) band 16#FF, + (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail]; +le32({Tag,Val}, Tail) -> % a relocatable datum + [{le32,Tag,Val} | Tail]. + +enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index}; +enc_sindex_opt(none) -> {2#00, 2#100}. + +enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + mk_sib(Scale, Index, Base). + +enc_ea(EA, RO, Tail) -> + case EA of + {ea_disp32_base, Disp32, Base} -> + [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)]; + {ea_disp32_sib, Disp32, SIB} -> + [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)]; + {ea_disp8_base, Disp8, Base} -> + [mk_modrm(2#01, RO, Base), Disp8 | Tail]; + {ea_disp8_sib, Disp8, SIB} -> + [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail]; + {ea_base, Base} -> + [mk_modrm(2#00, RO, Base) | Tail]; + {ea_disp32_sindex, Disp32, SindexOpt} -> + {Scale, Index} = enc_sindex_opt(SindexOpt), + SIB = mk_sib(Scale, Index, 2#101), + MODRM = mk_modrm(2#00, RO, 2#100), + [MODRM, SIB | le32(Disp32, Tail)]; + {ea_sib, SIB} -> + [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail]; + {ea_disp32, Disp32} -> + [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)] + end. + +encode_rm(RM, RO, Tail) -> + case RM of + {rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail]; + {rm_mem, EA} -> enc_ea(EA, RO, Tail) + end. + +sizeof_ea(EA) -> + case element(1, EA) of + ea_disp32_base -> 5; + ea_disp32_sib -> 6; + ea_disp8_base -> 2; + ea_disp8_sib -> 3; + ea_base -> 1; + ea_disp32_sindex -> 6; + ea_sib -> 2; + ea_disp32 -> 5 + end. + +sizeof_rm(RM) -> + case RM of + {rm_reg, _} -> 1; + {rm_mem, EA} -> sizeof_ea(EA) + end. + +%%% Floating point stack positions + +-define(ST0, 2#000). +-define(ST1, 2#001). +-define(ST2, 2#010). +-define(ST3, 2#011). +-define(ST4, 2#100). +-define(ST5, 2#101). +-define(ST6, 2#110). +-define(ST7, 2#111). + +st(0) -> ?ST0; +st(1) -> ?ST1; +st(2) -> ?ST2; +st(3) -> ?ST3; +st(4) -> ?ST4; +st(5) -> ?ST5; +st(6) -> ?ST6; +st(7) -> ?ST7. + + +%%% Instructions +%%% +%%% Insn ::= {Op,Opnds} +%%% Opnds ::= {Opnd1,...,Opndn} (n >= 0) +%%% Opnd ::= eax | ax | al | 1 | cl +%%% | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} +%%% | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} +%%% | {rel32,Rel32} | {rel8,Rel8} +%%% | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} +%%% | {cc,CC} +%%% | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} +%%% | {ea,EA} + +-define(PFX_OPND, 16#66). + +arith_binop_encode(SubOpcode, Opnds) -> + %% add, or, adc, sbb, and, sub, xor, cmp + case Opnds of + {eax, {imm32,Imm32}} -> + [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#83 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])] + end. + +arith_binop_sizeof(Opnds) -> + %% add, or, adc, sbb, and, sub, xor, cmp + case Opnds of + {eax, {imm32,_}} -> + 1 + 4; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4; + {{rm32,RM32}, {imm8,_}} -> + 1 + sizeof_rm(RM32) + 1; + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) -> % bsf, bsr + [16#0F, Opcode | encode_rm(RM32, Reg32, [])]. + +bs_op_sizeof({{reg32,_}, {rm32,RM32}}) -> % bsf, bsr + 2 + sizeof_rm(RM32). + +bswap_encode({{reg32,Reg32}}) -> + [16#0F, 16#C8 bor Reg32]. + +bswap_sizeof({{reg32,_}}) -> + 2. + +bt_op_encode(SubOpcode, Opnds) -> % bt, btc, btr, bts + case Opnds of + {{rm32,RM32}, {reg32,Reg32}} -> + [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])] + end. + +bt_op_sizeof(Opnds) -> % bt, btc, btr, bts + case Opnds of + {{rm32,RM32}, {reg32,_}} -> + 2 + sizeof_rm(RM32); + {{rm32,RM32}, {imm8,_}} -> + 2 + sizeof_rm(RM32) + 1 + end. + +call_encode(Opnds) -> + case Opnds of + {{rel32,Rel32}} -> + [16#E8 | le32(Rel32, [])]; + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#010, [])] + end. + +call_sizeof(Opnds) -> + case Opnds of + {{rel32,_}} -> + 1 + 4; + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +cbw_encode({}) -> + [?PFX_OPND, 16#98]. + +cbw_sizeof({}) -> + 2. + +nullary_op_encode(Opcode, {}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + [Opcode]. + +nullary_op_sizeof({}) -> + %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std + 1. + +cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) -> + [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])]. + +cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) -> + 2 + sizeof_rm(RM32). + +incdec_encode(SubOpcode, Opnds) -> % SubOpcode is either 0 or 1 + case Opnds of + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, SubOpcode, [])]; + {{reg32,Reg32}} -> + [16#40 bor (SubOpcode bsl 3) bor Reg32] + end. + +incdec_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1 + end. + +arith_unop_encode(Opcode, {{rm32,RM32}}) -> % div, idiv, mul, neg, not + [16#F7 | encode_rm(RM32, Opcode, [])]. + +arith_unop_sizeof({{rm32,RM32}}) -> % div, idiv, mul, neg, not + 1 + sizeof_rm(RM32). + +enter_encode({{imm16,Imm16}, {imm8,Imm8}}) -> + [16#C8 | le16(Imm16, [Imm8])]. + +enter_sizeof({{imm16,_}, {imm8,_}}) -> + 1 + 2 + 1. + +imul_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> % *= rm32 + [16#F7 | encode_rm(RM32, 2#101, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> % reg *= rm32 + [16#0F, 16#AF | encode_rm(RM32, Reg32, [])]; + {{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} -> % reg := rm32 * sext(imm8) + [16#6B | encode_rm(RM32, Reg32, [Imm8])]; + {{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} -> % reg := rm32 * imm32 + [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))] + end. + +imul_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}} -> + 2 + sizeof_rm(RM32); + {{reg32,_}, {rm32,RM32}, {imm8,_}} -> + 1 + sizeof_rm(RM32) + 1; + {{reg32,_}, {rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4 + end. + +jcc_encode(Opnds) -> + case Opnds of + {{cc,CC}, {rel8,Rel8}} -> + [16#70 bor CC, Rel8]; + {{cc,CC}, {rel32,Rel32}} -> + [16#0F, 16#80 bor CC | le32(Rel32, [])] + end. + +jcc_sizeof(Opnds) -> + case Opnds of + {{cc,_}, {rel8,_}} -> + 2; + {{cc,_}, {rel32,_}} -> + 2 + 4 + end. + +jmp8_op_encode(Opcode, {{rel8,Rel8}}) -> % jecxz, loop, loope, loopne + [Opcode, Rel8]. + +jmp8_op_sizeof({{rel8,_}}) -> % jecxz, loop, loope, loopne + 2. + +jmp_encode(Opnds) -> + case Opnds of + {{rel8,Rel8}} -> + [16#EB, Rel8]; + {{rel32,Rel32}} -> + [16#E9 | le32(Rel32, [])]; + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#100, [])] + end. + +jmp_sizeof(Opnds) -> + case Opnds of + {{rel8,_}} -> + 2; + {{rel32,_}} -> + 1 + 4; + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32) + end. + +lea_encode({{reg32,Reg32}, {ea,EA}}) -> + [16#8D | enc_ea(EA, Reg32, [])]. + +lea_sizeof({{reg32,_}, {ea,EA}}) -> + 1 + sizeof_ea(EA). + +mov_encode(Opnds) -> + case Opnds of + {{rm8,RM8}, {reg8,Reg8}} -> + [16#88 | encode_rm(RM8, Reg8, [])]; + {{rm16,RM16}, {reg16,Reg16}} -> + [?PFX_OPND, 16#89 | encode_rm(RM16, Reg16, [])]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#89 | encode_rm(RM32, Reg32, [])]; + {{reg8,Reg8}, {rm8,RM8}} -> + [16#8A | encode_rm(RM8, Reg8, [])]; + {{reg16,Reg16}, {rm16,RM16}} -> + [?PFX_OPND, 16#8B | encode_rm(RM16, Reg16, [])]; + {{reg32,Reg32}, {rm32,RM32}} -> + [16#8B | encode_rm(RM32, Reg32, [])]; + {al, {moffs8,Moffs8}} -> + [16#A0 | le32(Moffs8, [])]; + {ax, {moffs16,Moffs16}} -> + [?PFX_OPND, 16#A1 | le32(Moffs16, [])]; + {eax, {moffs32,Moffs32}} -> + [16#A1 | le32(Moffs32, [])]; + {{moffs8,Moffs8}, al} -> + [16#A2 | le32(Moffs8, [])]; + {{moffs16,Moffs16}, ax} -> + [?PFX_OPND, 16#A3 | le32(Moffs16, [])]; + {{moffs32,Moffs32}, eax} -> + [16#A3 | le32(Moffs32, [])]; + {{reg8,Reg8}, {imm8,Imm8}} -> + [16#B0 bor Reg8, Imm8]; + {{reg16,Reg16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#B8 bor Reg16 | le16(Imm16, [])]; + {{reg32,Reg32}, {imm32,Imm32}} -> + [16#B8 bor Reg32 | le32(Imm32, [])]; + {{rm8,RM8}, {imm8,Imm8}} -> + [16#C6 | encode_rm(RM8, 2#000, [Imm8])]; + {{rm16,RM16}, {imm16,Imm16}} -> + [?PFX_OPND, 16#C7 | encode_rm(RM16, 2#000, le16(Imm16, []))]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))] + end. + +mov_sizeof(Opnds) -> + case Opnds of + {{rm8,RM8}, {reg8,_}} -> + 1 + sizeof_rm(RM8); + {{rm16,RM16}, {reg16,_}} -> + 2 + sizeof_rm(RM16); + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32); + {{reg8,_}, {rm8,RM8}} -> + 1 + sizeof_rm(RM8); + {{reg16,_}, {rm16,RM16}} -> + 2 + sizeof_rm(RM16); + {{reg32,_}, {rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {al, {moffs8,_}} -> + 1 + 4; + {ax, {moffs16,_}} -> + 2 + 4; + {eax, {moffs32,_}} -> + 1 + 4; + {{moffs8,_}, al} -> + 1 + 4; + {{moffs16,_}, ax} -> + 2 + 4; + {{moffs32,_}, eax} -> + 1 + 4; + {{reg8,_}, {imm8,_}} -> + 2; + {{reg16,_}, {imm16,_}} -> + 2 + 2; + {{reg32,_}, {imm32,_}} -> + 1 + 4; + {{rm8,RM8}, {imm8,_}} -> + 1 + sizeof_rm(RM8) + 1; + {{rm16,RM16}, {imm16,_}} -> + 2 + sizeof_rm(RM16) + 2; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4 + end. + +movx_op_encode(Opcode, Opnds) -> % movsx, movzx + case Opnds of + {{reg16,Reg16}, {rm8,RM8}} -> + [?PFX_OPND, 16#0F, Opcode | encode_rm(RM8, Reg16, [])]; + {{reg32,Reg32}, {rm8,RM8}} -> + [16#0F, Opcode | encode_rm(RM8, Reg32, [])]; + {{reg32,Reg32}, {rm16,RM16}} -> + [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])] + end. + +movx_op_sizeof(Opnds) -> + case Opnds of + {{reg16,_}, {rm8,RM8}} -> + 3 + sizeof_rm(RM8); + {{reg32,_}, {rm8,RM8}} -> + 2 + sizeof_rm(RM8); + {{reg32,_}, {rm16,RM16}} -> + 2 + sizeof_rm(RM16) + end. + +pop_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + [16#8F | encode_rm(RM32, 2#000, [])]; + {{reg32,Reg32}} -> + [16#58 bor Reg32] + end. + +pop_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1 + end. + +push_encode(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + [16#FF | encode_rm(RM32, 2#110, [])]; + {{reg32,Reg32}} -> + [16#50 bor Reg32]; + {{imm8,Imm8}} -> % sign-extended + [16#6A, Imm8]; + {{imm32,Imm32}} -> + [16#68 | le32(Imm32, [])] + end. + +push_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}} -> + 1 + sizeof_rm(RM32); + {{reg32,_}} -> + 1; + {{imm8,_}} -> + 2; + {{imm32,_}} -> + 1 + 4 + end. + +shift_op_encode(SubOpcode, Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr + case Opnds of + {{rm32,RM32}, 1} -> + [16#D1 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, cl} -> + [16#D3 | encode_rm(RM32, SubOpcode, [])]; + {{rm32,RM32}, {imm8,Imm8}} -> + [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])]; + {{rm16,RM16}, {imm8,Imm8}} -> + [?PFX_OPND, 16#C1 | encode_rm(RM16, SubOpcode, [Imm8])] + end. + +shift_op_sizeof(Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr + case Opnds of + {{rm32,RM32}, 1} -> + 1 + sizeof_rm(RM32); + {{rm32,RM32}, cl} -> + 1 + sizeof_rm(RM32); + {{rm32,RM32}, {imm8,_Imm8}} -> + 1 + sizeof_rm(RM32) + 1; + {{rm16,RM16}, {imm8,_Imm8}} -> + 1 + 1 + sizeof_rm(RM16) + 1 + end. + +ret_encode(Opnds) -> + case Opnds of + {} -> + [16#C3]; + {{imm16,Imm16}} -> + [16#C2 | le16(Imm16, [])] + end. + +ret_sizeof(Opnds) -> + case Opnds of + {} -> + 1; + {{imm16,_}} -> + 1 + 2 + end. + +setcc_encode({{cc,CC}, {rm8,RM8}}) -> + [16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])]. + +setcc_sizeof({{cc,_}, {rm8,RM8}}) -> + 2 + sizeof_rm(RM8). + +shd_op_encode(Opcode, Opnds) -> + case Opnds of + {{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} -> + [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])]; + {{rm32,RM32}, {reg32,Reg32}, cl} -> + [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])] + end. + +shd_op_sizeof(Opnds) -> + case Opnds of + {{rm32,RM32}, {reg32,_}, {imm8,_}} -> + 2 + sizeof_rm(RM32) + 1; + {{rm32,RM32}, {reg32,_}, cl} -> + 2 + sizeof_rm(RM32) + end. + +test_encode(Opnds) -> + case Opnds of + {eax, {imm32,Imm32}} -> + [16#A9 | le32(Imm32, [])]; + {{rm32,RM32}, {imm32,Imm32}} -> + [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))]; + {{rm32,RM32}, {reg32,Reg32}} -> + [16#85 | encode_rm(RM32, Reg32, [])] + end. + +test_sizeof(Opnds) -> + case Opnds of + {eax, {imm32,_}} -> + 1 + 4; + {{rm32,RM32}, {imm32,_}} -> + 1 + sizeof_rm(RM32) + 4; + {{rm32,RM32}, {reg32,_}} -> + 1 + sizeof_rm(RM32) + end. + +fild_encode(Opnds) -> + %% The operand cannot be a register! + {{rm32, RM32}} = Opnds, + [16#DB | encode_rm(RM32, 2#000, [])]. + +fild_sizeof(Opnds) -> + {{rm32, RM32}} = Opnds, + 1 + sizeof_rm(RM32). + +fld_encode(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, 2#000, [])]; + {{fpst, St}} -> + [16#D9, 16#C0 bor st(St)] + end. + +fld_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst, _}} -> + 2 + end. + +fp_comm_arith_encode(OpCode, Opnds) -> + %% fadd, fmul + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)] + end. + +fp_comm_arith_pop_encode(OpCode, Opnds) -> + %% faddp, fmulp + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)]; + {{fpst,St},{fpst,0}} -> + [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +fp_arith_encode(OpCode, Opnds) -> + %% fdiv, fsub + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode band 2#110, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode bor 1, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_pop_encode(OpCode, Opnds) -> + %% fdivp, fsubp + OpCode0 = OpCode bor 1, + case Opnds of + [] -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst,0}} -> + [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_rev_encode(OpCode, Opnds) -> + %% fdivr, fsubr + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DC | encode_rm(RM64fp, OpCode, [])]; + {{fpst,0}, {fpst,St}} -> + OpCode0 = OpCode bor 1, + [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)]; + {{fpst,St}, {fpst,0}} -> + OpCode0 = OpCode band 2#110, + [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_rev_pop_encode(OpCode, Opnds) -> + %% fdivrp, fsubrp + OpCode0 = OpCode band 2#110, + case Opnds of + [] -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)]; + {{fpst,St}, {fpst, 0}} -> + [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)] + end. + +fp_arith_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst,0}, {fpst,_}} -> + 2; + {{fpst,_}, {fpst,0}} -> + 2 + end. + +fst_encode(OpCode, Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + [16#DD | encode_rm(RM64fp, OpCode, [])]; + {{fpst, St}} -> + [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)] + end. + +fst_sizeof(Opnds) -> + case Opnds of + {{rm64fp, RM64fp}} -> + 1 + sizeof_rm(RM64fp); + {{fpst, _}} -> + 2 + end. + +fchs_encode() -> + [16#D9, 16#E0]. +fchs_sizeof() -> + 2. + +ffree_encode({{fpst, St}})-> + [16#DD, 16#C0 bor st(St)]. +ffree_sizeof() -> + 2. + +fwait_encode() -> + [16#9B]. +fwait_sizeof() -> + 1. + +fxch_encode(Opnds) -> + case Opnds of + [] -> + [16#D9, 16#C8 bor st(1)]; + {{fpst, St}} -> + [16#D9, 16#C8 bor st(St)] + end. +fxch_sizeof() -> + 2. + +insn_encode(Op, Opnds, Offset) -> + Bytes = insn_encode_internal(Op, Opnds), + case has_relocs(Bytes) of + false -> % the common case + {Bytes, []}; + _ -> + fix_relocs(Bytes, Offset, [], []) + end. + +has_relocs([{le32,_,_}|_]) -> true; +has_relocs([_|Bytes]) -> has_relocs(Bytes); +has_relocs([]) -> false. + +fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+4, + [16#00, 16#00, 16#00, 16#00 | Code], + [{Tag,Offset,Val}|Relocs]); +fix_relocs([Byte|Bytes], Offset, Code, Relocs) -> + fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs); +fix_relocs([], _Offset, Code, Relocs) -> + {lists:reverse(Code), lists:reverse(Relocs)}. + +insn_encode_internal(Op, Opnds) -> + case Op of + 'adc' -> arith_binop_encode(2#010, Opnds); + 'add' -> arith_binop_encode(2#000, Opnds); + 'and' -> arith_binop_encode(2#100, Opnds); + 'bsf' -> bs_op_encode(16#BC, Opnds); + 'bsr' -> bs_op_encode(16#BD, Opnds); + 'bswap' -> bswap_encode(Opnds); + 'bt' -> bt_op_encode(2#100, Opnds); + 'btc' -> bt_op_encode(2#111, Opnds); + 'btr' -> bt_op_encode(2#110, Opnds); + 'bts' -> bt_op_encode(2#101, Opnds); + 'call' -> call_encode(Opnds); + 'cbw' -> cbw_encode(Opnds); + 'cdq' -> nullary_op_encode(16#99, Opnds); + 'clc' -> nullary_op_encode(16#F8, Opnds); + 'cld' -> nullary_op_encode(16#FC, Opnds); + 'cmc' -> nullary_op_encode(16#F5, Opnds); + 'cmovcc' -> cmovcc_encode(Opnds); + 'cmp' -> arith_binop_encode(2#111, Opnds); + 'cwde' -> nullary_op_encode(16#98, Opnds); + 'dec' -> incdec_encode(2#001, Opnds); + 'div' -> arith_unop_encode(2#110, Opnds); + 'enter' -> enter_encode(Opnds); + 'fadd' -> fp_comm_arith_encode(2#000, Opnds); + 'faddp' -> fp_comm_arith_pop_encode(2#000, Opnds); + 'fchs' -> fchs_encode(); + 'fdiv' -> fp_arith_encode(2#110, Opnds); + 'fdivp' -> fp_arith_pop_encode(2#110, Opnds); + 'fdivr' -> fp_arith_rev_encode(2#111, Opnds); + 'fdivrp' -> fp_arith_rev_pop_encode(2#111, Opnds); + 'ffree' -> ffree_encode(Opnds); + 'fild' -> fild_encode(Opnds); + 'fld' -> fld_encode(Opnds); + 'fmul' -> fp_comm_arith_encode(2#001, Opnds); + 'fmulp' -> fp_comm_arith_pop_encode(2#001, Opnds); + 'fst' -> fst_encode(2#010, Opnds); + 'fstp' -> fst_encode(2#011, Opnds); + 'fsub' -> fp_arith_encode(2#100, Opnds); + 'fsubp' -> fp_arith_pop_encode(2#100, Opnds); + 'fsubr' -> fp_arith_rev_encode(2#101, Opnds); + 'fsubrp' -> fp_arith_rev_pop_encode(2#101, Opnds); + 'fwait' -> fwait_encode(); + 'fxch' -> fxch_encode(Opnds); + 'idiv' -> arith_unop_encode(2#111, Opnds); + 'imul' -> imul_encode(Opnds); + 'inc' -> incdec_encode(2#000, Opnds); + 'into' -> nullary_op_encode(16#CE, Opnds); + 'jcc' -> jcc_encode(Opnds); + 'jecxz' -> jmp8_op_encode(16#E3, Opnds); + 'jmp' -> jmp_encode(Opnds); + 'lea' -> lea_encode(Opnds); + 'leave' -> nullary_op_encode(16#C9, Opnds); + 'loop' -> jmp8_op_encode(16#E2, Opnds); + 'loope' -> jmp8_op_encode(16#E1, Opnds); + 'loopne' -> jmp8_op_encode(16#E0, Opnds); + 'mov' -> mov_encode(Opnds); + 'movsx' -> movx_op_encode(16#BE, Opnds); + 'movzx' -> movx_op_encode(16#B6, Opnds); + 'mul' -> arith_unop_encode(2#100, Opnds); + 'neg' -> arith_unop_encode(2#011, Opnds); + 'nop' -> nullary_op_encode(16#90, Opnds); + 'not' -> arith_unop_encode(2#010, Opnds); + 'or' -> arith_binop_encode(2#001, Opnds); + 'pop' -> pop_encode(Opnds); + 'prefix_fs' -> nullary_op_encode(16#64, Opnds); + 'push' -> push_encode(Opnds); + 'rcl' -> shift_op_encode(2#010, Opnds); + 'rcr' -> shift_op_encode(2#011, Opnds); + 'ret' -> ret_encode(Opnds); + 'rol' -> shift_op_encode(2#000, Opnds); + 'ror' -> shift_op_encode(2#001, Opnds); + 'sar' -> shift_op_encode(2#111, Opnds); + 'sbb' -> arith_binop_encode(2#011, Opnds); + 'setcc' -> setcc_encode(Opnds); + 'shl' -> shift_op_encode(2#100, Opnds); + 'shld' -> shd_op_encode(16#A4, Opnds); + 'shr' -> shift_op_encode(2#101, Opnds); + 'shrd' -> shd_op_encode(16#AC, Opnds); + 'stc' -> nullary_op_encode(16#F9, Opnds); + 'std' -> nullary_op_encode(16#FD, Opnds); + 'sub' -> arith_binop_encode(2#101, Opnds); + 'test' -> test_encode(Opnds); + 'xor' -> arith_binop_encode(2#110, Opnds); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +insn_sizeof(Op, Opnds) -> + case Op of + 'adc' -> arith_binop_sizeof(Opnds); + 'add' -> arith_binop_sizeof(Opnds); + 'and' -> arith_binop_sizeof(Opnds); + 'bsf' -> bs_op_sizeof(Opnds); + 'bsr' -> bs_op_sizeof(Opnds); + 'bswap' -> bswap_sizeof(Opnds); + 'bt' -> bt_op_sizeof(Opnds); + 'btc' -> bt_op_sizeof(Opnds); + 'btr' -> bt_op_sizeof(Opnds); + 'bts' -> bt_op_sizeof(Opnds); + 'call' -> call_sizeof(Opnds); + 'cbw' -> cbw_sizeof(Opnds); + 'cdq' -> nullary_op_sizeof(Opnds); + 'clc' -> nullary_op_sizeof(Opnds); + 'cld' -> nullary_op_sizeof(Opnds); + 'cmc' -> nullary_op_sizeof(Opnds); + 'cmovcc' -> cmovcc_sizeof(Opnds); + 'cmp' -> arith_binop_sizeof(Opnds); + 'cwde' -> nullary_op_sizeof(Opnds); + 'dec' -> incdec_sizeof(Opnds); + 'div' -> arith_unop_sizeof(Opnds); + 'enter' -> enter_sizeof(Opnds); + 'fadd' -> fp_arith_sizeof(Opnds); + 'faddp' -> fp_arith_sizeof(Opnds); + 'fchs' -> fchs_sizeof(); + 'fdiv' -> fp_arith_sizeof(Opnds); + 'fdivp' -> fp_arith_sizeof(Opnds); + 'fdivr' -> fp_arith_sizeof(Opnds); + 'fdivrp' -> fp_arith_sizeof(Opnds); + 'ffree' -> ffree_sizeof(); + 'fild' -> fild_sizeof(Opnds); + 'fld' -> fld_sizeof(Opnds); + 'fmul' -> fp_arith_sizeof(Opnds); + 'fmulp' -> fp_arith_sizeof(Opnds); + 'fst' -> fst_sizeof(Opnds); + 'fstp' -> fst_sizeof(Opnds); + 'fsub' -> fp_arith_sizeof(Opnds); + 'fsubp' -> fp_arith_sizeof(Opnds); + 'fsubr' -> fp_arith_sizeof(Opnds); + 'fsubrp' -> fp_arith_sizeof(Opnds); + 'fwait' -> fwait_sizeof(); + 'fxch' -> fxch_sizeof(); + 'idiv' -> arith_unop_sizeof(Opnds); + 'imul' -> imul_sizeof(Opnds); + 'inc' -> incdec_sizeof(Opnds); + 'into' -> nullary_op_sizeof(Opnds); + 'jcc' -> jcc_sizeof(Opnds); + 'jecxz' -> jmp8_op_sizeof(Opnds); + 'jmp' -> jmp_sizeof(Opnds); + 'lea' -> lea_sizeof(Opnds); + 'leave' -> nullary_op_sizeof(Opnds); + 'loop' -> jmp8_op_sizeof(Opnds); + 'loope' -> jmp8_op_sizeof(Opnds); + 'loopne' -> jmp8_op_sizeof(Opnds); + 'mov' -> mov_sizeof(Opnds); + 'movsx' -> movx_op_sizeof(Opnds); + 'movzx' -> movx_op_sizeof(Opnds); + 'mul' -> arith_unop_sizeof(Opnds); + 'neg' -> arith_unop_sizeof(Opnds); + 'nop' -> nullary_op_sizeof(Opnds); + 'not' -> arith_unop_sizeof(Opnds); + 'or' -> arith_binop_sizeof(Opnds); + 'pop' -> pop_sizeof(Opnds); + 'prefix_fs' -> nullary_op_sizeof(Opnds); + 'push' -> push_sizeof(Opnds); + 'rcl' -> shift_op_sizeof(Opnds); + 'rcr' -> shift_op_sizeof(Opnds); + 'ret' -> ret_sizeof(Opnds); + 'rol' -> shift_op_sizeof(Opnds); + 'ror' -> shift_op_sizeof(Opnds); + 'sar' -> shift_op_sizeof(Opnds); + 'sbb' -> arith_binop_sizeof(Opnds); + 'setcc' -> setcc_sizeof(Opnds); + 'shl' -> shift_op_sizeof(Opnds); + 'shld' -> shd_op_sizeof(Opnds); + 'shr' -> shift_op_sizeof(Opnds); + 'shrd' -> shd_op_sizeof(Opnds); + 'stc' -> nullary_op_sizeof(Opnds); + 'std' -> nullary_op_sizeof(Opnds); + 'sub' -> arith_binop_sizeof(Opnds); + 'test' -> test_sizeof(Opnds); + 'xor' -> arith_binop_sizeof(Opnds); + _ -> exit({?MODULE,insn_sizeof,Op}) + end. + +%%===================================================================== +%% testing interface +%%===================================================================== + +-ifdef(DO_HIPE_X86_ENCODE_TEST). + +say(OS, Str) -> + file:write(OS, Str). + +digit16(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, "0x"), + say(OS, [digit16(Byte bsr 4)]), + say(OS, [digit16(Byte)]). + +init(OS) -> + say(OS, "\t.text\n"). + +say_bytes(OS, Byte0, Bytes0) -> + say_byte(OS, Byte0), + case Bytes0 of + [] -> + say(OS, "\n"); + [Byte1|Bytes1] -> + say(OS, ","), + say_bytes(OS, Byte1, Bytes1) + end. + +t(OS, Op, Opnds) -> + insn_sizeof(Op, Opnds), + {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0), + say(OS, "\t.byte "), + say_bytes(OS, Byte, Bytes). + +dotest1(OS) -> + init(OS), + % exercise all rm32 types + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}), + t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}), + t(OS,call,{{rm32,rm_reg(?EAX)}}), + t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}), + t(OS,call,{{rel32,-5}}), + % default parameters for the tests below + Word32 = 16#87654321, + Word16 = 16#F00F, + Word8 = 16#80, + Imm32 = {imm32,Word32}, + Imm16 = {imm16,Word16}, + Imm8 = {imm8,Word8}, + RM32 = {rm32,rm_reg(?EDX)}, + RM16 = {rm16,rm_reg(?EDX)}, + RM8 = {rm8,rm_reg(?EDX)}, + Rel32 = {rel32,Word32}, + Rel8 = {rel8,Word8}, + Moffs32 = {moffs32,Word32}, + Moffs16 = {moffs16,Word32}, + Moffs8 = {moffs8,Word32}, + CC = {cc,?CC_G}, + Reg32 = {reg32,?EAX}, + Reg16 = {reg16,?EAX}, + Reg8 = {reg8,?AH}, + EA = {ea,ea_base(?ECX)}, + % exercise each instruction definition + t(OS,'adc',{eax,Imm32}), + t(OS,'adc',{RM32,Imm32}), + t(OS,'adc',{RM32,Imm8}), + t(OS,'adc',{RM32,Reg32}), + t(OS,'adc',{Reg32,RM32}), + t(OS,'add',{eax,Imm32}), + t(OS,'add',{RM32,Imm32}), + t(OS,'add',{RM32,Imm8}), + t(OS,'add',{RM32,Reg32}), + t(OS,'add',{Reg32,RM32}), + t(OS,'and',{eax,Imm32}), + t(OS,'and',{RM32,Imm32}), + t(OS,'and',{RM32,Imm8}), + t(OS,'and',{RM32,Reg32}), + t(OS,'and',{Reg32,RM32}), + t(OS,'bsf',{Reg32,RM32}), + t(OS,'bsr',{Reg32,RM32}), + t(OS,'bswap',{Reg32}), + t(OS,'bt',{RM32,Reg32}), + t(OS,'bt',{RM32,Imm8}), + t(OS,'btc',{RM32,Reg32}), + t(OS,'btc',{RM32,Imm8}), + t(OS,'btr',{RM32,Reg32}), + t(OS,'btr',{RM32,Imm8}), + t(OS,'bts',{RM32,Reg32}), + t(OS,'bts',{RM32,Imm8}), + t(OS,'call',{Rel32}), + t(OS,'call',{RM32}), + t(OS,'cbw',{}), + t(OS,'cdq',{}), + t(OS,'clc',{}), + t(OS,'cld',{}), + t(OS,'cmc',{}), + t(OS,'cmovcc',{CC,Reg32,RM32}), + t(OS,'cmp',{eax,Imm32}), + t(OS,'cmp',{RM32,Imm32}), + t(OS,'cmp',{RM32,Imm8}), + t(OS,'cmp',{RM32,Reg32}), + t(OS,'cmp',{Reg32,RM32}), + t(OS,'cwde',{}), + t(OS,'dec',{RM32}), + t(OS,'dec',{Reg32}), + t(OS,'div',{RM32}), + t(OS,'enter',{Imm16,{imm8,3}}), + t(OS,'idiv',{RM32}), + t(OS,'imul',{RM32}), + t(OS,'imul',{Reg32,RM32}), + t(OS,'imul',{Reg32,RM32,Imm8}), + t(OS,'imul',{Reg32,RM32,Imm32}), + t(OS,'inc',{RM32}), + t(OS,'inc',{Reg32}), + t(OS,'into',{}), + t(OS,'jcc',{CC,Rel8}), + t(OS,'jcc',{CC,Rel32}), + t(OS,'jecxz',{Rel8}), + t(OS,'jmp',{Rel8}), + t(OS,'jmp',{Rel32}), + t(OS,'jmp',{RM32}), + t(OS,'lea',{Reg32,EA}), + t(OS,'leave',{}), + t(OS,'loop',{Rel8}), + t(OS,'loope',{Rel8}), + t(OS,'loopne',{Rel8}), + t(OS,'mov',{RM8,Reg8}), + t(OS,'mov',{RM16,Reg16}), + t(OS,'mov',{RM32,Reg32}), + t(OS,'mov',{Reg8,RM8}), + t(OS,'mov',{Reg16,RM16}), + t(OS,'mov',{Reg32,RM32}), + t(OS,'mov',{al,Moffs8}), + t(OS,'mov',{ax,Moffs16}), + t(OS,'mov',{eax,Moffs32}), + t(OS,'mov',{Moffs8,al}), + t(OS,'mov',{Moffs16,ax}), + t(OS,'mov',{Moffs32,eax}), + t(OS,'mov',{Reg8,Imm8}), + t(OS,'mov',{Reg16,Imm16}), + t(OS,'mov',{Reg32,Imm32}), + t(OS,'mov',{RM8,Imm8}), + t(OS,'mov',{RM16,Imm16}), + t(OS,'mov',{RM32,Imm32}), + t(OS,'movsx',{Reg16,RM8}), + t(OS,'movsx',{Reg32,RM8}), + t(OS,'movsx',{Reg32,RM16}), + t(OS,'movzx',{Reg16,RM8}), + t(OS,'movzx',{Reg32,RM8}), + t(OS,'movzx',{Reg32,RM16}), + t(OS,'mul',{RM32}), + t(OS,'neg',{RM32}), + t(OS,'nop',{}), + t(OS,'not',{RM32}), + t(OS,'or',{eax,Imm32}), + t(OS,'or',{RM32,Imm32}), + t(OS,'or',{RM32,Imm8}), + t(OS,'or',{RM32,Reg32}), + t(OS,'or',{Reg32,RM32}), + t(OS,'pop',{RM32}), + t(OS,'pop',{Reg32}), + t(OS,'push',{RM32}), + t(OS,'push',{Reg32}), + t(OS,'push',{Imm8}), + t(OS,'push',{Imm32}), + t(OS,'rcl',{RM32,1}), + t(OS,'rcl',{RM32,cl}), + t(OS,'rcl',{RM32,Imm8}), + t(OS,'rcl',{RM16,Imm8}), + t(OS,'rcr',{RM32,1}), + t(OS,'rcr',{RM32,cl}), + t(OS,'rcr',{RM32,Imm8}), + t(OS,'rcr',{RM16,Imm8}), + t(OS,'ret',{}), + t(OS,'ret',{Imm16}), + t(OS,'rol',{RM32,1}), + t(OS,'rol',{RM32,cl}), + t(OS,'rol',{RM32,Imm8}), + t(OS,'rol',{RM16,Imm8}), + t(OS,'ror',{RM32,1}), + t(OS,'ror',{RM32,cl}), + t(OS,'ror',{RM32,Imm8}), + t(OS,'ror',{RM16,Imm8}), + t(OS,'sar',{RM32,1}), + t(OS,'sar',{RM32,cl}), + t(OS,'sar',{RM32,Imm8}), + t(OS,'sar',{RM16,Imm8}), + t(OS,'sbb',{eax,Imm32}), + t(OS,'sbb',{RM32,Imm32}), + t(OS,'sbb',{RM32,Imm8}), + t(OS,'sbb',{RM32,Reg32}), + t(OS,'sbb',{Reg32,RM32}), + t(OS,'setcc',{CC,RM8}), + t(OS,'shl',{RM32,1}), + t(OS,'shl',{RM32,cl}), + t(OS,'shl',{RM32,Imm8}), + t(OS,'shl',{RM16,Imm8}), + t(OS,'shld',{RM32,Reg32,Imm8}), + t(OS,'shld',{RM32,Reg32,cl}), + t(OS,'shr',{RM32,1}), + t(OS,'shr',{RM32,cl}), + t(OS,'shr',{RM32,Imm8}), + t(OS,'shr',{RM16,Imm8}), + t(OS,'shrd',{RM32,Reg32,Imm8}), + t(OS,'shrd',{RM32,Reg32,cl}), + t(OS,'stc',{}), + t(OS,'std',{}), + t(OS,'sub',{eax,Imm32}), + t(OS,'sub',{RM32,Imm32}), + t(OS,'sub',{RM32,Imm8}), + t(OS,'sub',{RM32,Reg32}), + t(OS,'sub',{Reg32,RM32}), + t(OS,'test',{eax,Imm32}), + t(OS,'test',{RM32,Imm32}), + t(OS,'test',{RM32,Reg32}), + t(OS,'xor',{eax,Imm32}), + t(OS,'xor',{RM32,Imm32}), + t(OS,'xor',{RM32,Imm8}), + t(OS,'xor',{RM32,Reg32}), + t(OS,'xor',{Reg32,RM32}), + t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32(16#20))}}), + []. + +dotest() -> dotest1(group_leader()). % stdout == group_leader + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). +-endif. diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt new file mode 100644 index 0000000000..13746e2a47 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_encode.txt @@ -0,0 +1,213 @@ +$Id$ + +hipe_x86_encode USAGE GUIDE +Revision 0.4, 2001-10-09 + +This document describes how to use the hipe_x86_encode.erl module. + +Preliminaries +------------- +This is not a tutorial on the x86 architecture. The reader +should be familiar with both the programming model and +the general syntax of instructions and their operands. + +The hipe_x86_encode module follows the conventions in the +"Intel Architecture Software Developer's Manual, Volume 2: +Instruction Set Reference" document. In particular, the +order of source and destination operands in instructions +follows Intel's conventions: "add eax,edx" adds edx to eax. +The GNU Assembler "gas" follows the so-called AT&T syntax +which reverses the order of the source and destination operands. + +Basic Functionality +------------------- +The hipe_x86_encode module implements the mapping from symbolic x86 +instructions to their binary representation, as lists of bytes. + +Instructions and operands have to match actual x86 instructions +and operands exactly. The mapping from "abstract" instructions +to correct x86 instructions has to be done before the instructions +are passed to the hipe_x86_encode module. (In HiPE, this mapping +is done by the hipe_x86_assemble module.) + +The hipe_x86_encode module handles arithmetic operations on 32-bit +integers, data movement of 8, 16, and 32-bit words, and most +control flow operations. A 32-bit address and operand size process +mode is assumed, which is what Unix and Linux systems use. + +Operations and registers related to floating-point, MMX, SIMD, 3dnow!, +or operating system control are not implemented. Segment registers +are supported minimally: a 'prefix_fs' pseudo-instruction can be +used to insert an FS segment register override prefix. + +Instruction Syntax +------------------ +The function hipe_x86_encode:insn_encode/1 takes an instruction in +symbolic form and translates it to its binary representation, +as a list of bytes. + +Symbolic instructions are Erlang terms in the following syntax: + + Insn ::= {Op,Opnds} + Op ::= (an Erlang atom) + Opnds ::= {Opnd1,...,Opndn} (n >= 0) + Opnd ::= eax | ax | al | 1 | cl + | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8} + | {rm32,RM32} | {rm16,RM16} | {rm8,RM8} + | {rel32,Rel32} | {rel8,Rel8} + | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8} + | {cc,CC} + | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8} + | {ea,EA} + Imm32 ::= (a 32-bit integer; immediate value) + Imm16 ::= (a 16-bit integer; immediate value) + Imm8 ::= (an 8-bit integer; immediate value) + Rel32 ::= (a 32-bit integer; jump offset) + Rel8 ::= (an 8-bit integer; jump offset) + Moffs32 ::= (a 32-bit integer; address of 32-bit word) + Moffs16 ::= (a 32-bit integer; address of 16-bit word) + Moffs8 ::= (a 32-bit integer; address of 8-bit word) + CC ::= (a 4-bit condition code) + Reg32 ::= (a 3-bit register number of a 32-bit register) + Reg16 ::= (same as Reg32, but the register size is 16 bits) + Reg8 ::= (a 3-bit register number of an 8-bit register) + EA ::= (general operand; a memory cell) + RM32 ::= (general operand; a 32-bit register or memory cell) + RM16 ::= (same as RM32, but the operand size is 16 bits) + RM8 ::= (general operand; an 8-bit register or memory cell) + +To construct these terms, the hipe_x86_encode module exports several +helper functions: + +cc/1 + Converts an atom to a 4-bit condition code. + +al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0 + Returns a 3-bit register number for an 8-bit register. + +eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0 + Returns a 3-bit register number for a 32- or 16-bit register. + +A general operand can be a register or a memory operand. +An x86 memory operand is expressed as an "effective address": + + Displacement(Base register,Index register,Scale) +or + [base register] + [(index register) * (scale)] + [displacement] + +where the base register is any of the 8 integer registers, +the index register in any of the 8 integer registers except ESP, +scale is 0, 1, 2, or 3 (multiply index with 1, 2, 4, or 8), +and displacement is an 8- or 32-bit offset. +Most components are optional. + +An effective address is constructed by calling one of the following +nine functions: + +ea_base/1 + ea_base(Reg32), where Reg32 is not ESP or EBP, + constructs the EA "(Reg32)", i.e. Reg32. +ea_disp32/1 + ea_disp32(Disp32) construct the EA "Disp32" +ea_disp32_base/2 + ea_disp32(Disp32, Reg32), where Reg32 is not ESP, + constructs the EA "Disp32(Reg32)", i.e. Reg32+Disp32. +ea_disp8_base/2 + This is like ea_disp32_base/2, except the displacement + is 8 bits instead of 32 bits. The CPU will _sign-extend_ + the 8-bit displacement to 32 bits before using it. +ea_disp32_sindex/1 + ea_disp32_sindex(Disp32) constructs the EA "Disp32", + but uses a longer encoding than ea_disp32/1. + Hint: Don't use this one. + +The last four forms use index registers with or without scaling +factors and base registers, so-called "SIBs". To build these, call: + +sindex/2 + sindex(Scale, Index), where scale is 0, 1, 2, or 3, and + Index is a 32-bit integer register except ESP, constructs + part of a SIB representing "Index * 2^Scale". +sib/1 + sib(Reg32) constructs a SIB containing only a base register + and no scaled index, "(Reg32)", i.e. "Reg32". +sib/2 + sib(Reg32, sindex(Scale, Index)) constructs a SIB + "(Reg32,Index,Scale)", i.e. "Reg32 + (Index * 2^Scale)". + +ea_sib/1 + ea_sib(SIB), where SIB's base register is not EBP, + constructs an EA which is that SIB, i.e. "(Base)" or + "(Base,Index,Scale)". +ea_disp32_sib/2 + ea_disp32_sib(Disp32, SIB) constructs the EA "Disp32(SIB)", + i.e. "Base+Disp32" or "Base+(Index*2^Scale)+Disp32". +ea_disp32_sindex/2 + ea_disp32_sindex(Disp32, Sindex) constructs the EA + "Disp32(,Index,Scale)", i.e. "(Index*2^Scale)+Disp32". +ea_disp8_sib/2 + This is just like ea_disp32_sib/2, except the displacement + is 8 bits (with sign-extension). + +To construct a general operand, call one of these two functions: + +rm_reg/1 + rm_reg(Reg) constructs a general operand which is that register. +rm_mem/1 + rm_mem(EA) constucts a general operand which is the memory + cell addressed by EA. + +A symbolic instruction with name "Op" and the n operands "Opnd1" +to "Opndn" is represented as the tuple + + {Op, {Opnd1, ..., Opndn}} + +Usage +----- +Once a symbolic instruction "Insn" has been constructed, it can be +translated to binary by calling + + insn_encode(Insn) + +which returns a list of bytes. + +Since x86 instructions have varying size (as opposed to most +RISC machines), there is also a function + + insn_sizeof(Insn) + +which returns the number of bytes the binary encoding will occupy. +insn_sizeof(Insn) equals length(insn_encode(Insn)), but insn_sizeof +is cheaper to compute. This is useful for two purposes: (1) when +compiling to memory, one needs to know in advance how many bytes of +memory to allocate for a piece of code, and (2) when computing the +relative distance between a jump or call instruction and its target +label. + +Examples +-------- +1. nop +is constructed as + {nop, {}} + +2. add eax,edx (eax := eax + edx) +can be constructed as + {add, {eax, {reg32, hipe_x86_encode:edx()}}} +or as + Reg32 = {reg32, hipe_x86_encode:eax()}, + RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:edx())}, + {add, {Reg32, RM32}} + +3. mov edx,(eax) (edx := MEM[eax]) +is constructed as + Reg32 = {reg32, hipe_x86_encode:edx()}, + RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:eax())}, + {mov, {Reg32, RM32}} + +Addendum +-------- +The hipe_x86_encode.erl source code is the authoritative reference +for the hipe_x86_encode module. + +Please report errors in either hipe_x86_encode.erl or this guide +to mikpe@it.uu.se. diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl new file mode 100644 index 0000000000..0a3317a369 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_frame.erl @@ -0,0 +1,687 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86 stack frame handling +%%% +%%% - map non-register temps to stack slots +%%% - add explicit stack management code to prologue and epilogue, +%%% and at calls and tailcalls +%%% +%%% TODO: +%%% - Compute max stack in a pre-pass? (get rid of ref cell updates) +%%% - Merge all_temps and defun_minframe to a single +%%% pass, for compile-time efficiency reasons. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_FRAME, hipe_amd64_frame). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(LEAF_WORDS, ?AMD64_LEAF_WORDS). +-else. +-define(HIPE_X86_FRAME, hipe_x86_frame). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(LEAF_WORDS, ?X86_LEAF_WORDS). +-endif. + +-module(?HIPE_X86_FRAME). +-export([frame/2]). +-include("../x86/hipe_x86.hrl"). +-include("../rtl/hipe_literals.hrl"). + +frame(Defun, _Options) -> + Formals = fix_formals(hipe_x86:defun_formals(Defun)), + Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + CFG0 = hipe_x86_cfg:init(Defun), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps), + hipe_x86_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps) -> + Context = mk_context(Liveness, Formals, Temps), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_x86_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), + Block = hipe_x86_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #alu{} -> + {[do_alu(I, Context, FPoff)], FPoff}; + #cmp{} -> + {[do_cmp(I, Context, FPoff)], FPoff}; + #fp_unop{} -> + {do_fp_unop(I, Context, FPoff), FPoff}; + #fp_binop{} -> + {do_fp_binop(I, Context, FPoff), FPoff}; + #fmove{} -> + {[do_fmove(I, Context, FPoff)], FPoff}; + #imul{} -> + {[do_imul(I, Context, FPoff)], FPoff}; + #move{} -> + {[do_move(I, Context, FPoff)], FPoff}; + #movsx{} -> + {[do_movsx(I, Context, FPoff)], FPoff}; + #movzx{} -> + {[do_movzx(I, Context, FPoff)], FPoff}; + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + #push{} -> + {[do_push(I, Context, FPoff)], FPoff+word_size()}; + #ret{} -> + {do_ret(I, Context, FPoff), context_framesize(Context)}; + #shift{} -> + {[do_shift(I, Context, FPoff)], FPoff}; + _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare + {[I], FPoff} + end. + +%%% +%%% Convert any pseudo-temp operand in a binary (alu, cmp, move) +%%% or unary (push) instruction to an explicit x86_mem operand. +%%% + +do_alu(I, Context, FPoff) -> + #alu{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#alu{src=Src,dst=Dst}. + +do_cmp(I, Context, FPoff) -> + #cmp{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#cmp{src=Src,dst=Dst}. + +do_fp_unop(I, Context, FPoff) -> + #fp_unop{arg=Arg0} = I, + Arg = conv_opnd(Arg0, FPoff, Context), + [I#fp_unop{arg=Arg}]. + +do_fp_binop(I, Context, FPoff) -> + #fp_binop{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + [I#fp_binop{src=Src,dst=Dst}]. + +do_fmove(I, Context, FPoff) -> + #fmove{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#fmove{src=Src,dst=Dst}. + +do_imul(I, Context, FPoff) -> + #imul{src=Src0} = I, + Src = conv_opnd(Src0, FPoff, Context), + I#imul{src=Src}. + +do_move(I, Context, FPoff) -> + #move{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#move{src=Src,dst=Dst}. + +do_movsx(I, Context, FPoff) -> + #movsx{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#movsx{src=Src,dst=Dst}. + +do_movzx(I, Context, FPoff) -> + #movzx{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#movzx{src=Src,dst=Dst}. + +do_push(I, Context, FPoff) -> + #push{src=Src0} = I, + Src = conv_opnd(Src0, FPoff, Context), + I#push{src=Src}. + +do_shift(I, Context, FPoff) -> + #shift{src=Src0,dst=Dst0} = I, + Src = conv_opnd(Src0, FPoff, Context), + Dst = conv_opnd(Dst0, FPoff, Context), + I#shift{src=Src,dst=Dst}. + +conv_opnd(Opnd, FPoff, Context) -> + case opnd_is_pseudo(Opnd) of + false -> + Opnd; + true -> + conv_pseudo(Opnd, FPoff, Context) + end. + +conv_pseudo(Temp, FPoff, Context) -> + Off = FPoff + context_offset(Context, Temp), + conv_pseudo(Temp, Off). + +conv_pseudo(Temp, Off) -> + hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_ret(_I, Context, FPoff) -> + %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made + adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]). + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest] + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I), + Fun0 = hipe_x86:pseudo_call_fun(I), + Fun1 = conv_opnd(Fun0, FPoff0, Context), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_x86:pseudo_call_contlab(I), + Linkage = hipe_x86:pseudo_call_linkage(I), + CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)], + %% +word_size() for our RA and +word_size() for callee's RA should + %% it need to call inc_stack + StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()), + context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, Fun) -> + case Fun of + #x86_prim{} -> FPoff; + #x86_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + #x86_temp{} -> stack_need_general(FPoff, StkArity); + #x86_mem{} -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_x86:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)], + Fun0 = hipe_x86:pseudo_tailcall_fun(I), + {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0), + context_need_stack(Context, FPoff1), + FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args), + %% +word_size() for callee's inc_stack RA + StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)), + context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)), + I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)), + Insns ++ adjust_sp(FPoff2, [I2]). + +do_tailcall_args(Args, Context, Fun0) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size() + word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + Moves = mk_moves(Args, FrameTop, []), + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + {PushFun0, FPoff3, LoadFun1, Fun1} = + case opnd_is_pseudo(Fun0) of + false -> + {[], FPoff2, [], Fun0}; + true -> + Type = hipe_x86:temp_type(Fun0), + Temp1 = mk_temp1(Type), + Fun0Off = context_offset(Context, Fun0), + MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off), + if Fun0Off >= DangerOff -> + Fun1Off = hipe_x86:mk_imm(0), + MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type), + {[hipe_x86:mk_push(MEM0)], + FPoff2 + word_size(), + [hipe_x86:mk_move(MEM1, Temp1)], + Temp1}; + true -> + {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1} + end + end, + %% + RegTemp0 = ?HIPE_X86_REGISTERS:temp0(), + TempReg = + case hipe_x86:is_temp(Fun1) of + true -> + RegFun1 = hipe_x86:temp_reg(Fun1), + if RegFun1 =/= RegTemp0 -> RegTemp0; + true -> ?HIPE_X86_REGISTERS:temp1() + end; + false -> + RegTemp0 + end, + %% + {Pushes ++ PushFun0 ++ + store_moves(Stores, FPoff3, LoadFun1 ++ + simple_moves(Simple, FPoff3, TempReg, + simple_moves(MoreSimple, FPoff3, TempReg, + []))), + FPoff3, Fun1}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_src(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) -> + Push = hipe_x86:mk_push( + hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)), + split_conflict(Conflict, FPoff+word_size(), [Push|Pushes], + [{-(FPoff+word_size()),DstOff,Type}|Simple]); +split_conflict([], FPoff, Pushes, Simple) -> + {lists:reverse(Pushes), Simple, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_x86:mk_temp(TempReg, Type), + SP = mk_sp(), + LoadOff = hipe_x86:mk_imm(FPoff+SrcOff), + LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp), + StoreOff = hipe_x86:mk_imm(FPoff+DstOff), + ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)), + simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, Rest) -> + Type = typeof_src(Src), + SP = mk_sp(), + StoreOff = hipe_x86:mk_imm(FPoff+DstOff), + ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)), + store_moves(Moves, FPoff, [ST | Rest]); +store_moves([], _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, ra, ref_maxstack}). + +mk_context(Liveness, Formals, Temps) -> + RA = hipe_x86:mk_new_temp('untagged'), + {Map, MinOff} = mk_temp_map(Formals, RA, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + Context = #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, ra=RA, ref_maxstack=RefMaxStack}, + Context. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_ra(#context{ra=RA}) -> + RA. + +mk_temp_map(Formals, RA, Temps) -> + {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1), + tmap_bind(tmap_empty(), RA, 0)), + enter_vars(tset_to_list(Temps), 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_x86:temp_type(V) of + 'double' -> PrevOff - float_size(); + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp0 = sp - MaxStack +%%% if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% sp -= FrameSize +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% call inc_stack +%%% goto NewStart + +do_prologue(CFG, Context) -> + do_check_stack(do_alloc_frame(CFG, Context), Context). + +do_alloc_frame(CFG, Context) -> + case context_framesize(Context) of + 0 -> + CFG; + FrameSize -> + OldStartLab = hipe_x86_cfg:start_label(CFG), + AllocFrameLab = hipe_gensym:get_next_label(x86), + SP = mk_sp(), + AllocFrameCode = + [hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP), + hipe_x86:mk_jmp_label(OldStartLab)], + CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab) + end. + +do_check_stack(CFG, Context) -> + MaxStack = context_maxstack(Context), + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()), + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + CFG; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_x86_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(x86), + IncStackLab = hipe_gensym:get_next_label(x86), + %% + Type = 'untagged', + Preg = ?HIPE_X86_REGISTERS:proc_pointer(), + Pbase = hipe_x86:mk_temp(Preg, Type), + SP_LIMIT_OFF = hipe_x86:mk_imm( + ?HIPE_X86_REGISTERS:sp_limit_offset()), + Temp0 = mk_temp0(Type), + SP = mk_sp(), + NewStartCode = + %% hopefully this lea is faster than the mov;sub it replaced + [hipe_x86:mk_lea( + hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'), + Temp0), + hipe_x86:mk_cmp( + hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0), + hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)], + IncStackCode = + [hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_x86:mk_jmp_label(NewStartLab)], + %% + CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + hipe_x86_cfg:start_label_update(CFG2, NewStartLab) + end. + +%%% typeof_src -- what's src's type? + +typeof_src(Src) -> + case Src of + #x86_imm{} -> + 'untagged'; + #x86_temp{} -> + hipe_x86:temp_type(Src); + #x86_mem{} -> + hipe_x86:mem_type(Src) + end. + +%%% Cons up an '%sp' Temp. + +mk_sp() -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged'). + +%%% Cons up a '%temp0' Temp. + +mk_temp0(Type) -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). + +%%% Cons up a '%temp1' Temp. + +mk_temp1(Type) -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + opnd_is_pseudo(Src). + +opnd_is_pseudo(Opnd) -> + case hipe_x86:is_temp(Opnd) of + true -> temp_is_pseudo(Opnd); + false -> false + end. + +temp_is_pseudo(Temp) -> + case hipe_x86:is_temp(Temp) of + true -> + not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); + false -> + false + end. + + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), + S2. + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), + S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_x86:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + ?HIPE_X86_REGISTERS:wordsize(). + +float_size() -> + ?HIPE_X86_REGISTERS:float_size(). diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl new file mode 100644 index 0000000000..6874b05a59 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_liveness.erl @@ -0,0 +1,57 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86_liveness -- compute register liveness for x86 CFGs + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_LIVENESS). + +-export([analyse/1]). +-export([liveout/2]). +-export([uses/1, defines/1]). % used in hipe_*_spill_restore modules + +-include("../x86/hipe_x86.hrl"). % ../x86/ is needed when included in amd64 +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_x86_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_x86_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_x86_cfg:succ(CFG, L). +uses(Insn) -> ?HIPE_X86_DEFUSE:insn_use(Insn). +defines(Insn) -> ?HIPE_X86_DEFUSE:insn_def(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_x86:mk_temp(Reg, Type) + end, + ?HIPE_X86_REGISTERS:live_at_return())). + +-ifdef(DEBUG_LIVENESS). +cfg_labels(CFG) -> hipe_x86_cfg:labels(CFG). +cfg_bb_add(CFG,L,NewBB) -> hipe_x86_cfg:bb_add(CFG,L,NewBB). +mk_comment(Text) -> hipe_x86:mk_comment(Text). +-endif. diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl new file mode 100644 index 0000000000..f45a49ca0a --- /dev/null +++ b/lib/hipe/x86/hipe_x86_main.erl @@ -0,0 +1,70 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_MAIN, hipe_amd64_main). +-define(RTL_TO_X86, rtl_to_amd64). % XXX: kill this crap +-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64). +-define(HIPE_X86_RA, hipe_amd64_ra). +-define(HIPE_X86_FRAME, hipe_amd64_frame). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(X86TAG, amd64). % XXX: kill this crap +-define(X86STR, "amd64"). +-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). +-else. +-define(HIPE_X86_MAIN, hipe_x86_main). +-define(RTL_TO_X86, rtl_to_x86). % XXX: kill this crap +-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86). +-define(HIPE_X86_RA, hipe_x86_ra). +-define(HIPE_X86_FRAME, hipe_x86_frame). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(X86TAG, x86). % XXX: kill this crap +-define(X86STR, "x86"). +-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). +-endif. + +-module(?HIPE_X86_MAIN). +-export([?RTL_TO_X86/3]). % XXX: change to 'from_rtl' to avoid $ARCH substring + +-ifndef(DEBUG). +-define(DEBUG,1). +-endif. +-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +?RTL_TO_X86(MFA, RTL, Options) -> + Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL), + "RTL-to-"?X86STR, Options), + SpillRest = + case proplists:get_bool(caller_save_spill_restore, Options) of + true -> + ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options), + ?X86STR" spill restore", Options); + false -> + Translated + end, + Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options), + ?X86STR" register allocation", Options), + Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options), + ?X86STR" frame", Options), + Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options), + ?X86STR" finalise", Options), + ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options), + {native, ?X86TAG, {unprofiled, Finalised}}. diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl new file mode 100644 index 0000000000..34e3d7a11b --- /dev/null +++ b/lib/hipe/x86/hipe_x86_postpass.erl @@ -0,0 +1,276 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%---------------------------------------------------------------------- +%%% File : hipe_x86_postpass.erl +%%% Author : Christoffer Vikström +%%% Purpose : Contain postpass optimisations for x86-assembler code. +%%% Created : 5 Aug 2003 by Christoffer Vikström +%%%---------------------------------------------------------------------- + +-ifndef(HIPE_X86_POSTPASS). +-define(HIPE_X86_POSTPASS, hipe_x86_postpass). +-endif. + +-module(?HIPE_X86_POSTPASS). +-export([postpass/2]). +-include("../x86/hipe_x86.hrl"). + +%%>----------------------------------------------------------------------< +% Procedure : postpass/2 +% Purpose : Function that performs a nr of postpass optimizations on +% the hipe x86-assembler code before it is encoded and loaded. +%%>----------------------------------------------------------------------< +postpass(#defun{code=Code0}=Defun, Options) -> + Code1 = pseudo_insn_expansion(Code0), + Code2 = case proplists:get_bool(peephole, Options) of + true -> peephole_optimization(Code1); + false -> Code1 + end, + Code3 = trivial_goto_elimination(Code2), + Defun#defun{code=Code3}. + + +%%>----------------------------------------------------------------------< +% Procedure : peep/1 +% Purpose : Function that does peephole optimizations. It works by +% moving a window over the code and looking at a sequence of +% a few instructions. Replaces long sequences of instructions +% with shorter ones and removes unnecesary ones. +% Arguments : Insns - List of pseudo x86-assembler records. +% Res - Returned list of pseudo x86-assembler records. +% Kept reversed, until it is returned. +% Return : An optimized list of pseudo x86-assembler records with +% (hopefully) fewer or faster instructions. +%%>----------------------------------------------------------------------< +peephole_optimization(Insns) -> + peep(Insns, [], []). + +%% MoveSelf related peep-opts +%% ------------------------------ +peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf1|Lst]); +peep([I=#fmove{src=Src, dst=Dst}, + #fmove{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf2|Lst]); +peep([#movsx{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf3|Lst]); +peep([I=#movsx{src=Src, dst=Dst}, + #movsx{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf4|Lst]); +peep([#movzx{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf5|Lst]); +peep([I=#movzx{src=Src, dst=Dst}, + #movzx{src=Dst, dst=Src} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf6|Lst]); +peep([#cmovcc{src=Src, dst=Src} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf7|Lst]); +peep([I=#cmovcc{src=Src, dst=Dst}, + #cmovcc{src=Dst, dst=Src}|Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf8|Lst]); +peep([#move{src=#x86_temp{reg=X}, + dst=#x86_temp{reg=X}} | Insns], Res,Lst) -> + peep(Insns, Res, [moveSelf9|Lst]); +peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}}, + #move{src=#x86_temp{reg=Dst}, dst=#x86_temp{reg=Src}} | Insns], Res,Lst) -> + peep(Insns, [I|Res], [moveSelf0|Lst]); + + +%% ElimBinALMDouble +%% ---------------- +peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) -> + peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]); + + +%% ElimFBinDouble +%% -------------- +peep([Move=#fmove{src=Src, dst=Dst}, + BinOp=#fp_binop{src=Src, dst=Dst}|Insns], Res, Lst) -> + peep([BinOp#fp_binop{src=Dst}|Insns], [Move|Res], [elimFBinDouble|Lst]); + + +%% CommuteBinALMD +%% -------------- +peep([#move{src=Src1, dst=Dst}, + #alu{aluop=Op,src=Src2,dst=Dst}|Insns], Res, Lst) + when (Src1 =:= #x86_imm{}) and (Src2 =/= #x86_imm{}) and + ((Op =:= 'add') or (Op =:= 'and') or (Op =:= 'or') or (Op =:= 'xor')) -> + peep(Insns, [#alu{aluop=Op,src=Src1,dst=Dst}, + #move{src=Src2, dst=Dst}|Res], + [commuteBinALMD|Lst]); + + +%% ElimCmp0 +%% -------- +peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) -> + case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and + ((Cond =:= 'eq') or (Cond =:= 'neq'))) of + true -> + Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end, + Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end, + Test = #test{src=Src2, dst=#x86_imm{value=0}}, + Jump = #jcc{cc=Cond2, label=Lab}, + peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]); + _ -> + peep(Insns, [J,C|Res], Lst) + end; + + +%% ElimCmpTest +%% ----------- +peep([I|Insns],Res,Lst) when (I =:= #cmp{}) or (I =:= #test{}) -> + case check(Insns) of + #jcc{} -> + peep(Insns, [I|Res], Lst); + #jmp_fun{} -> + peep(Insns, [I|Res], Lst); + #jmp_label{} -> + peep(Insns, [I|Res], Lst); + #jmp_switch{} -> + peep(Insns, [I|Res], Lst); + #cmovcc{} -> + peep(Insns, [I|Res], Lst); + #ret{} -> + peep(Insns, [I|Res], Lst); + _ -> + peep(Insns, Res, [elimCmpTest|Lst]) + end; + + +%% ElimPushPop +%% ----------- +peep([#push{src=Opr}, #pop{dst=Opr} | Insns], Res, Lst) -> + peep(Insns, Res, [elimPushPop|Lst]); + + +% %% ElimIFF +% %% ------- +peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) -> + peep(Insns, [I, #jmp_label{label=Lab}|Res], [elimIFF|Lst]); + + +%% ElimSet0 +%% -------- +peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst) +when (Dst==#x86_temp{}) -> + peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]); + +%% ElimMDPow2 +%% ---------- +peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) -> + {IsLog2, Size, Sign} = log2(Val), + case ((Op =:= imul) or (Op =:= idiv)) and IsLog2 of + true -> + Sh = case Sign of positive -> 'bsl'; negative -> 'bsr' end, + peep(Insns, + [#shift{shiftop=Sh, src=#x86_imm{value=Size}, dst=Dst}|Res], + [elimMDPow2|Lst]); + false -> + peep(Insns, [B|Res], Lst) + end; + +%% SubToDec +%% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which +%% changes reduction counter tests to use decl instead of subl. +%% However, on Athlon64 this leads to a small but measurable decrease +%% in performance. The use of dec is also not recommended on P4, so +%% this transformation is disabled. +%% peep([#alu{aluop='sub',src=#x86_imm{value=1},dst=Dst},J=#jcc{cc='l'}|Insns], Res, Lst) -> +%% peep(Insns, [J, #dec{dst=Dst} | Res], [subToDec|Lst]); + +%% Standard list recursion clause +%% ------------------------------ +peep([I | Insns], Res, Lst) -> + peep(Insns, [I|Res], Lst); +peep([], Res, _Lst) -> + lists:reverse(Res). + +%% Simple goto elimination +%% ----------------------- +trivial_goto_elimination(Insns) -> goto_elim(Insns, []). + +goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) -> + goto_elim([I|Insns], Res); +goto_elim([I | Insns], Res) -> + goto_elim(Insns, [I|Res]); +goto_elim([], Res) -> + lists:reverse(Res). + + +%%>----------------------------------------------------------------------< +%% Procedure : expand/1 +%% Purpose : Expands pseudo instructions. +%% Arguments : Insns - An x86-instruction list. +%% Return : An expanded instruction list. +%% Notes : +%%>----------------------------------------------------------------------< +pseudo_insn_expansion(Insns) -> expand(Insns, []). +expand([I|Tail], Res) -> + case I of + #pseudo_jcc{cc=Cc,true_label=TrueLab,false_label=FalseLab} -> + expand(Tail, [hipe_x86:mk_jmp_label(FalseLab), + hipe_x86:mk_jcc(Cc, TrueLab) | Res]); + #pseudo_tailcall_prepare{} -> + expand(Tail, Res); + #pseudo_call{'fun'=Fun,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + expand(Tail, [hipe_x86:mk_jmp_label(ContLab), + hipe_x86:mk_call(Fun, SDesc, Linkage) | Res]); + _ -> + expand(Tail, [I|Res]) + end; +expand([], Res) -> lists:reverse(Res). + +%% Log2 function +%% ------------- +%% Used by ElimMDPow2 clause of peep(..) +log2(Nr) -> log2(Nr, 0). +log2(0, _) -> {false, 0, positive}; +log2(Nr, I) -> + case (Nr band 1) =:= 1 of + true -> + case Nr of + 1 -> + {true, I, positive}; + -1 -> + {true, I, negative}; + _ -> + {false, 0, positive} + end; + false -> + log2((Nr bsr 1), I+1) + end. + +%% Skips through all comments and move instructions and returns the next one +%% ------------------------------------------------------------------------- +%% Used by ElimCmpTest above. +check([I|Ins]) -> + case I of + #comment{} -> + check(Ins); + #move{} -> + check(Ins); + #fmove{} -> + check(Ins); + #movsx{} -> + check(Ins); + #movzx{} -> + check(Ins); + OtherI -> + OtherI + end. diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl new file mode 100644 index 0000000000..555e21a446 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_pp.erl @@ -0,0 +1,350 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% x86 pretty-printer + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_PP). +-export([% pp/1, pp/2, + pp_insn/1, optional_pp/3]). +-include("../x86/hipe_x86.hrl"). + +optional_pp(Defun, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + pp(Defun); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA, Lst) of + true -> pp(Defun); + false -> ok + end; + {only,MFA} -> + pp(Defun); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + pp(File, Defun), + ok = file:close(File); + _ -> + ok + end. + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, x86, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + ok. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, src=Src, dst=Dst} -> + io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tcall ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #cmovcc{cc=Cc, src=Src, dst=Dst} -> + io:format(Dev, "\tcmov~s ", [cc_name(Cc)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #cmp{src=Src, dst=Dst} -> + io:format(Dev, "\tcmp ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #imul{imm_opt=ImmOpt, src=Src, temp=Temp} -> + io:format(Dev, "\timul ", []), + case ImmOpt of + [] -> ok; + Imm -> + pp_imm(Dev, Imm, true), + io:format(Dev, ", ", []) + end, + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Temp), + io:format(Dev, "\n", []); + #jcc{cc=Cc, label=Label} -> + io:format(Dev, "\tj~s .~s_~w\n", [cc_name(Cc), Pre, Label]); + #jmp_fun{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tjmp ", []), + pp_fun(Dev, Fun), + io:format(Dev, " ~w\n", [Linkage]); + #jmp_label{label=Label} -> + io:format(Dev, "\tjmp .~s_~w\n", [Pre, Label]); + #jmp_switch{temp=Temp, jtab=JTab, labels=Labels} -> + io:format(Dev, "\tjmp *{constant,~w}(,", [JTab]), + pp_temp(Dev, Temp), + io:format(Dev, ",4) #", []), + pp_labels(Dev, Labels, Pre), + io:format(Dev, "\n", []); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #lea{mem=Mem, temp=Temp} -> + io:format(Dev, "\tlea ", []), + pp_mem(Dev, Mem), + io:format(Dev, ", ", []), + pp_temp(Dev, Temp), + io:format(Dev, "\n", []); + #move{src=Src, dst=Dst} -> + io:format(Dev, "\tmov ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #move64{} -> + pp_move64(Dev, I); + #movsx{src=Src, dst=Dst} -> + io:format(Dev, "\tmovsx ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #movzx{src=Src, dst=Dst} -> + io:format(Dev, "\tmovzx ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_jcc{cc=Cc, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_j~s ", [cc_name(Cc)]), + io:format(Dev, ".~s_~w # .~s_~w ~.2f\n", + [Pre, TrueLab, Pre, FalseLab, Pred]); + #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_fun(Dev, Fun), + io:format(Dev, "~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #push{src=Src} -> + io:format(Dev, "\tpush ", []), + pp_src(Dev, Src), + io:format(Dev, "\n", []); + #ret{npop=NPop} -> + io:format(Dev, "\tret $~s\n", [to_hex(NPop)]); + #shift{shiftop=ShiftOp, src=Src, dst=Dst} -> + io:format(Dev, "\t~s ", [alu_op_name(ShiftOp)]), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + #fp_binop{src=Src, dst=Dst, op=Op} -> + io:format(Dev, "\t~s ", [Op]), + pp_dst(Dev, Dst), + io:format(Dev, ", ", []), + pp_src(Dev, Src), + io:format(Dev, "\n", []); + #fp_unop{arg=Arg, op=Op} -> + io:format(Dev, "\t~s ", [Op]), + case Arg of + []-> + io:format(Dev, "\n", []); + _ -> + pp_args(Dev, [Arg]), + io:format(Dev, "\n", []) + end; + #fmove{src=Src, dst=Dst} -> + io:format(Dev, "\tfmove ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []); + _ -> + exit({?MODULE, pp_insn, {"unknown x86 instruction", I}}) + end. + +-ifdef(HIPE_AMD64). +pp_move64(Dev, I) -> + #move64{imm=Src, dst=Dst} = I, + io:format(Dev, "\tmov64 ", []), + pp_src(Dev, Src), + io:format(Dev, ", ", []), + pp_dst(Dev, Dst), + io:format(Dev, "\n", []). +-else. +pp_move64(_Dev, I) -> exit({?MODULE, I}). +-endif. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> ok; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> ok + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + ok. + +pp_fun(Dev, Fun) -> + case Fun of + #x86_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #x86_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]); + _ -> % temp or mem + io:format(Dev, "*", []), + pp_dst(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +cc_name(Cc) -> Cc. + +pp_hard_reg(Dev, Reg) -> + io:format(Dev, "~s", [?HIPE_X86_REGISTERS:reg_name(Reg)]). + +type_tag('tagged') -> "t"; +type_tag('untagged') -> "u"; +type_tag('double') -> "d". + +pp_temp(Dev, #x86_temp{reg=Reg, type=Type}) -> + case Type of + double -> + Tag = type_tag(Type), + io:format(Dev, "~s~w", [Tag, Reg]); + _ -> + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + pp_hard_reg(Dev, Reg); + false -> + Tag = type_tag(Type), + io:format(Dev, "~s~w", [Tag, Reg]) + end + end. + +pp_fpreg(Dev, #x86_fpreg{reg=Reg, pseudo=Pseudo})-> + case Pseudo of + true -> io:format(Dev, "pseudo_fp(~w)", [Reg]); + _ -> io:format(Dev, "st(~w)", [Reg]) + end. + +pp_imm(Dev, #x86_imm{value=Value}, Dollar) -> + if Dollar =:= true -> io:format(Dev, [$$], []); + true -> ok + end, + if is_integer(Value) -> io:format(Dev, "~s", [to_hex(Value)]); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_mem(Dev, #x86_mem{base=Base, off=Off}) -> + pp_off(Dev, Off), + case Base of + [] -> + ok; + _ -> + io:format(Dev, "(", []), + pp_temp(Dev, Base), + io:format(Dev, ")", []) + end. + +pp_off(Dev, Off) -> + pp_src(Dev, Off, false). + +pp_src(Dev, Src) -> + pp_src(Dev, Src, true). + +pp_src(Dev, Src, Dollar) -> + case Src of + #x86_temp{} -> + pp_temp(Dev, Src); + #x86_imm{} -> + pp_imm(Dev, Src, Dollar); + #x86_mem{} -> + pp_mem(Dev, Src); + #x86_fpreg{} -> + pp_fpreg(Dev, Src) + end. + +pp_dst(Dev, Dst) -> + pp_src(Dev, Dst). + +pp_args(Dev, [A|As]) -> + pp_src(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + ok. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_src(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + ok. diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl new file mode 100644 index 0000000000..d50b9aabad --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra.erl @@ -0,0 +1,99 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA, hipe_amd64_ra). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). +-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-else. +-define(HIPE_X86_RA, hipe_x86_ra). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). +-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-endif. + +-module(?HIPE_X86_RA). +-export([ra/2]). + +%%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun0, Options) -> + %% ?HIPE_X86_PP:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options), + %% ?HIPE_X86_PP:pp(Defun1), + ?start_ra_instrumentation(Options, + length(hipe_x86:defun_code(Defun1)), + element(2,hipe_x86:defun_var_range(Defun1))), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options); + naive -> + ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + ?stop_ra_instrumentation(Options, + length(hipe_x86:defun_code(Defun2)), + element(2,hipe_x86:defun_var_range(Defun2))), + %% ?HIPE_X86_PP:pp(Defun2), + ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC). + +-ifdef(HIPE_AMD64). +ra_fp(Defun, Options) -> + case proplists:get_bool(inline_fp, Options) and + (proplists:get_value(regalloc, Options) =/= naive) of + true -> + case proplists:get_bool(x87, Options) of + true -> + hipe_amd64_ra_x87_ls:ra(Defun, Options); + false -> + hipe_regalloc_loop:ra_fp(Defun, Options, + hipe_coalescing_regalloc, + hipe_amd64_specific_sse2) + end; + false -> + {Defun,[],0} + end. +-else. +ra_fp(Defun, Options) -> + case proplists:get_bool(inline_fp, Options) of + true -> + hipe_x86_ra_x87_ls:ra(Defun, Options); + false -> + {Defun,[],0} + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl new file mode 100644 index 0000000000..10b4df05d2 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl @@ -0,0 +1,335 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% - apply temp -> reg/spill map from RA + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_X87, hipe_amd64_x87). +-else. +-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_X87, hipe_x86_x87). +-endif. + +-module(?HIPE_X86_RA_FINALISE). +-export([finalise/4]). +-include("../x86/hipe_x86.hrl"). + +finalise(Defun, TempMap, FpMap, Options) -> + Defun1 = finalise_ra(Defun, TempMap, FpMap, Options), + case proplists:get_bool(x87, Options) of + true -> + ?HIPE_X86_X87:map(Defun1); + _ -> + Defun1 + end. + +%%% +%%% Finalise the temp->reg/spill mapping. +%%% (XXX: maybe this should be merged with the main pass, +%%% but I just want this to work now) +%%% + +finalise_ra(Defun, [], [], _Options) -> + Defun; +finalise_ra(Defun, TempMap, FpMap, Options) -> + Code = hipe_x86:defun_code(Defun), + {_, SpillLimit} = hipe_x86:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options), + NewCode = ra_code(Code, Map, FpMap0), + Defun#defun{code=NewCode}. + +ra_code(Code, Map, FpMap) -> + [ra_insn(I, Map, FpMap) || I <- Code]. + +ra_insn(I, Map, FpMap) -> + case I of + #alu{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#alu{src=Src,dst=Dst}; + #call{} -> + I; + #cmovcc{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#cmovcc{src=Src,dst=Dst}; + #cmp{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#cmp{src=Src,dst=Dst}; + #comment{} -> + I; + #fmove{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#fmove{src=Src,dst=Dst}; + #fp_unop{arg=Arg0} -> + Arg = ra_opnd(Arg0, Map, FpMap), + I#fp_unop{arg=Arg}; + #fp_binop{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map, FpMap), + Dst = ra_opnd(Dst0, Map, FpMap), + I#fp_binop{src=Src,dst=Dst}; + #imul{src=Src0,temp=Temp0} -> + Src = ra_opnd(Src0, Map), + Temp = ra_temp(Temp0, Map), + I#imul{src=Src,temp=Temp}; + #jcc{} -> + I; + #jmp_fun{'fun'=Fun0} -> + Fun = ra_opnd(Fun0, Map), + I#jmp_fun{'fun'=Fun}; + #jmp_label{} -> + I; + #jmp_switch{temp=Temp0,jtab=JTab0} -> + Temp = ra_opnd(Temp0, Map), + JTab = ra_opnd(JTab0, Map), + I#jmp_switch{temp=Temp,jtab=JTab}; + #label{} -> + I; + #lea{mem=Mem0,temp=Temp0} -> + Mem = ra_mem(Mem0, Map), + Temp = ra_temp(Temp0, Map), + I#lea{mem=Mem,temp=Temp}; + #move{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#move{src=Src,dst=Dst}; + #move64{dst=Dst0} -> + Dst = ra_opnd(Dst0, Map), + I#move64{dst=Dst}; + #movsx{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#movsx{src=Src,dst=Dst}; + #movzx{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#movzx{src=Src,dst=Dst}; + #pseudo_call{'fun'=Fun0} -> + Fun = ra_opnd(Fun0, Map), + I#pseudo_call{'fun'=Fun}; + #pseudo_jcc{} -> + I; + #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} -> + Fun = ra_opnd(Fun0, Map), + StkArgs = ra_args(StkArgs0, Map), + I#pseudo_tailcall{'fun'=Fun,stkargs=StkArgs}; + #pseudo_tailcall_prepare{} -> + I; + #push{src=Src0} -> + Src = ra_opnd(Src0, Map), + I#push{src=Src}; + #ret{} -> + I; + #shift{src=Src0,dst=Dst0} -> + Src = ra_opnd(Src0, Map), + Dst = ra_opnd(Dst0, Map), + I#shift{src=Src,dst=Dst}; + _ -> + exit({?MODULE,ra_insn,I}) + end. + +ra_args(Args, Map) -> + [ra_opnd(Opnd, Map) || Opnd <- Args]. + +ra_opnd(Opnd, Map) -> + ra_opnd(Opnd, Map, gb_trees:empty()). +ra_opnd(Opnd, Map, FpMap) -> + case Opnd of + #x86_temp{} -> ra_temp(Opnd, Map, FpMap); + #x86_mem{} -> ra_mem(Opnd, Map); + _ -> Opnd + end. + +ra_mem(Mem, Map) -> + #x86_mem{base=Base0,off=Off0} = Mem, + Base = ra_opnd(Base0, Map), + Off = ra_opnd(Off0, Map), + Mem#x86_mem{base=Base,off=Off}. + +ra_temp(Temp, Map) -> + ra_temp(Temp, Map, gb_trees:empty()). + +ra_temp(Temp, Map, FpMap) -> + Reg = hipe_x86:temp_reg(Temp), + case hipe_x86:temp_type(Temp) of + double -> + ra_temp_double(Temp, Reg, FpMap); + _-> + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + Temp; + _ -> + case gb_trees:lookup(Reg, Map) of + {value,NewReg} -> Temp#x86_temp{reg=NewReg}; + _ -> Temp + end + end + end. + +-ifdef(HIPE_AMD64). +ra_temp_double(Temp, Reg, FpMap) -> + case hipe_amd64_registers:is_precoloured_sse2(Reg) of + true -> + Temp; + _ -> + case gb_trees:lookup(Reg, FpMap) of + {value,NewReg} -> Temp#x86_temp{reg=NewReg}; + _ -> Temp + end + end. +-else. +ra_temp_double(Temp, Reg, FpMap) -> + case gb_trees:lookup(Reg, FpMap) of + {value,NewReg} -> + case hipe_x86_registers:is_precoloured_x87(NewReg) of + true -> hipe_x86:mk_fpreg(NewReg); + false -> + Temp#x86_temp{reg=NewReg} + end; + _ -> + Temp + end. +-endif. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% ?HIPE_X86_REGISTERS:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(x86), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(x86, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +mk_ra_map_x87(FpMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_x87), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FpMap). + +-ifdef(HIPE_AMD64). +mk_ra_map_sse2(FpMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_sse2), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FpMap). + +mk_ra_map_fp(FpMap, SpillLimit, Options) -> + case proplists:get_bool(x87, Options) of + true -> mk_ra_map_x87(FpMap, SpillLimit); + false -> mk_ra_map_sse2(FpMap, SpillLimit) + end. +-else. +mk_ra_map_fp(FpMap, SpillLimit, _Options) -> + mk_ra_map_x87(FpMap, SpillLimit). +-endif. + +-ifdef(notdef). +conv_ra_maplet_fp(MapLet = {From,To}, SpillLimit) -> + %% From should be a pseudo + if is_integer(From), From =< SpillLimit -> []; + true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + case hipe_x86_registers:is_precoloured_x87(NewReg) of + true-> []; + false -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of NewReg check. + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(x86), + if MaxTempNum >= ToTempNum -> []; + true -> hipe_gensym:set_var(x86, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet_fp,MapLet}) + end. +-endif. diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl new file mode 100644 index 0000000000..ab7b6708ad --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_ls.erl @@ -0,0 +1,85 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Linear Scan register allocator for x86 + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-else. +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-endif. + +-module(?HIPE_X86_RA_LS). +-export([ra/3,regalloc/7]). +-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_x86_cfg:init(NewDefun), + + SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries( + CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + CFG = hipe_x86_cfg:init(Defun), + {Coloring, NewSpillIndex} = + regalloc( + CFG, + ?HIPE_X86_REGISTERS:allocatable()-- + [?HIPE_X86_REGISTERS:temp1(), + ?HIPE_X86_REGISTERS:temp0()], + [hipe_x86_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + ?HIPE_X86_SPECIFIC), + {NewDefun, _DidSpill} = + ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite( + Defun, Coloring, 'linearscan'), + %% ?HIPE_X86_PP:pp(NewDefun), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + {TempMap2,NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + ?HIPE_X86_SPECIFIC, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + case proplists:get_bool(verbose_spills, Options) of + true -> + ?msg("Stack slot size: ~p~n",[NewSpillIndex2-SpillIndex]); + false -> + ok + end, + ?add_spills(Options, NewSpillIndex), + {NewDefun, Coloring2}. + +regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, + DontSpill, Options, Target). diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl new file mode 100644 index 0000000000..e9b99cd2c5 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_naive.erl @@ -0,0 +1,409 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% simple local x86 regalloc + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC_FP, hipe_amd64_specific_sse2). +-define(ECX, rcx). +-else. +-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC_FP, hipe_x86_specific_x87). +-define(ECX, ecx). +-endif. + +-module(?HIPE_X86_RA_NAIVE). +-export([ra/3]). + +-include("../x86/hipe_x86.hrl"). +-define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation +-include("../main/hipe.hrl"). + +ra(X86Defun, Coloring_fp, Options) -> + #defun{code=Code0} = X86Defun, + Code1 = do_insns(Code0), + NofSpilledFloats = count_non_float_spills(Coloring_fp), + NofFloats = length(Coloring_fp), + ?add_spills(Options, hipe_gensym:get_var(x86) - + ?HIPE_X86_REGISTERS:first_virtual()- + NofSpilledFloats - + NofFloats), + TempMap = [], + {X86Defun#defun{code=Code1, + var_range={0, hipe_gensym:get_var(x86)}}, + TempMap}. + +count_non_float_spills(Coloring_fp) -> + count_non_float_spills(Coloring_fp, 0). + +count_non_float_spills([{_,To}|Tail], Num) -> + case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of + true -> + count_non_float_spills(Tail, Num); + false -> + count_non_float_spills(Tail, Num+1) + end; +count_non_float_spills([], Num) -> + Num. + +do_insns([I|Insns]) -> + do_insn(I) ++ do_insns(Insns); +do_insns([]) -> + []. + +do_insn(I) -> % Insn -> Insn list + case I of + #alu{} -> + do_alu(I); + #cmp{} -> + do_cmp(I); + #imul{} -> + do_imul(I); + #jmp_switch{} -> + do_jmp_switch(I); + #lea{} -> + do_lea(I); + #move{} -> + do_move(I); + #move64{} -> + do_move64(I); + #movzx{} -> + do_movx(I); + #movsx{} -> + do_movx(I); + #fmove{} -> + do_fmove(I); + #fp_unop{} -> + do_fp_unop(I); + #fp_binop{} -> + do_fp_binop(I); + #shift{} -> + do_shift(I); + #label{} -> + [I]; + #pseudo_jcc{} -> + [I]; + #pseudo_call{} -> + [I]; + #ret{} -> + [I]; + #pseudo_tailcall_prepare{} -> + [I]; + #pseudo_tailcall{} -> + [I]; + #push{} -> + [I]; + #jmp_label{} -> + [I]; + #comment{} -> + [I]; + _ -> + io:format("Unknown Instruction = ~w\n", [I]), + exit({?MODULE, unknown_instruction, I}) + end. + +%%% Fix an alu op. + +do_alu(I) -> + #alu{src=Src0,dst=Dst0} = I, + {FixSrc,Src,FixDst,Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}]. + +%%% Fix a cmp op. + +do_cmp(I) -> + #cmp{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}]. + +%%% Fix an imul op. + +do_imul(I) -> + #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, + {FixSrc,Src} = fix_src_operand(Src0), % may use temp0 + {FixTempSrc,Temp,FixTempDst} = + case temp_is_pseudo(Temp0) of + false -> + {[], Temp0, []}; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), 'untagged'), + {case ImmOpt of + [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src + _ -> [] % temp = src * imm + end, + Reg, + [hipe_x86:mk_move(Reg, Temp0)]} + end, + FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst. + +%%% Fix a jmp_switch op. + +-ifdef(HIPE_AMD64). +do_jmp_switch(I) -> + #jmp_switch{temp=Temp, jtab=Tab} = I, + case temp_is_pseudo(Temp) of + false -> + case temp_is_pseudo(Tab) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{jtab=Reg}] + end; + true -> + Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp1(), 'untagged'), + case temp_is_pseudo(Tab) of + false -> + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}]; + true -> + Reg2 = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), + hipe_x86:mk_move(Tab, Reg2), + I#jmp_switch{temp=Reg, jtab=Reg2}] + end + end. +-else. +do_jmp_switch(I) -> + #jmp_switch{temp=Temp} = I, + case temp_is_pseudo(Temp) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), + [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}] + end. +-endif. + +%%% Fix a lea op. + +do_lea(I) -> + #lea{temp=Temp} = I, + case temp_is_pseudo(Temp) of + false -> + [I]; + true -> + Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'), + [I#lea{temp=Reg}, hipe_x86:mk_move(Reg, Temp)] + end. + +%%% Fix a move op. + +do_move(I) -> + #move{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}]. + +-ifdef(HIPE_AMD64). +do_move64(I) -> + #move64{dst=Dst} = I, + case is_mem_opnd(Dst) of + false -> + [I]; + true -> + Reg = hipe_amd64_registers:temp1(), + NewDst = clone(Dst, Reg), + [I#move64{dst=NewDst}, hipe_x86:mk_move(NewDst, Dst)] + end. +-else. +do_move64(I) -> exit({?MODULE, I}). +-endif. + +do_movx(I) -> + {{FixSrc, Src}, {FixDst, Dst}} = + case I of + #movsx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0), fix_dst_operand(Dst0)}; + #movzx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0), fix_dst_operand(Dst0)} + end, + Reg = ?HIPE_X86_REGISTERS:temp0(), + Dst2 = clone(Dst, Reg), + I2 = case is_mem_opnd(Dst) of + true -> + Reg = ?HIPE_X86_REGISTERS:temp0(), + Dst2 = clone(Dst, Reg), + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] + end; + false -> + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst)] + end + end, + FixSrc ++ FixDst ++ I2. + + +%%% Fix a fmove op. +%% conv_to_float +do_fmove(I=#fmove{src=#x86_temp{type=untagged}, + dst=#x86_temp{type=double}}) -> + #fmove{src=Src0,dst=Dst0} = I, + Src = clone(Src0, ?HIPE_X86_REGISTERS:temp0()), + Dst = clone(Dst0, ?HIPE_X86_REGISTERS:temp1()), + [hipe_x86:mk_move(Src0, Src), + I#fmove{src=Src, dst=Dst}, + hipe_x86:mk_fmove(Dst, Dst0)]; +%% fmove +do_fmove(I) -> + #fmove{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0), + FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}]. + +do_fp_unop(I) -> + #fp_unop{arg=Arg} = I, + case is_mem_opnd(Arg) of + false -> + [I]; + true -> + Reg = ?HIPE_X86_REGISTERS:temp1(), + NewArg = clone(Arg, Reg), + [hipe_x86:mk_fmove(Arg, NewArg), + I#fp_unop{arg=NewArg}, + hipe_x86:mk_fmove(NewArg, Arg)] + end. + +do_fp_binop(I) -> + #fp_binop{src=Src0, dst=Dst0} = I, + {FixSrc, Src} = fix_src_operand(Src0), + {FixDst, Dst} = fix_dst_operand(Dst0), + Reg = ?HIPE_X86_REGISTERS:temp1(), + Dst2 = clone(Dst, Reg), + FixSrc ++ FixDst ++ [hipe_x86:mk_fmove(Dst, Dst2), + I#fp_binop{src=Src, dst=Dst2}, + hipe_x86:mk_fmove(Dst2, Dst)]. + +do_shift(I) -> + #shift{src=Src0,dst=Dst0} = I, + {FixDst, Dst} = fix_dst_operand(Dst0), + Reg = ?HIPE_X86_REGISTERS:?ECX(), + case Src0 of + #x86_imm{} -> + FixDst ++ [I#shift{dst=Dst}]; + #x86_temp{reg=Reg} -> + FixDst ++ [I#shift{dst=Dst}] + end. + +%%% Fix the operands of a binary op. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if both operands are (implicit or explicit) memory operands, +%%% move src to a reg and use reg as src in the original insn + +do_binary(Src0, Dst0) -> + {FixSrc, Src} = fix_src_operand(Src0), + {FixDst, Dst} = fix_dst_operand(Dst0), + {FixSrc3, Src3} = + case is_mem_opnd(Src) of + false -> + {FixSrc, Src}; + true -> + case is_mem_opnd(Dst) of + false -> + {FixSrc, Src}; + true -> + Reg = ?HIPE_X86_REGISTERS:temp0(), + Src2 = clone(Src, Reg), + FixSrc2 = FixSrc ++ [mk_move(Src, Src2)], + {FixSrc2, Src2} + end + end, + {FixSrc3, Src3, FixDst, Dst}. + +%%% Fix any x86_mem operand to not refer to any pseudos. +%%% The fixup may use additional instructions and registers. +%%% 'src' operands may clobber '%temp0'. +%%% 'dst' operands may clobber '%temp1'. + +fix_src_operand(Opnd) -> + fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp0()). + +fix_dst_operand(Opnd) -> + fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp1()). + +fix_mem_operand(Opnd, Reg) -> % -> {[fixupcode], newop} + case Opnd of + #x86_mem{base=Base,off=Off} -> + case is_mem_opnd(Base) of + false -> + case src_is_pseudo(Off) of + false -> + {[], Opnd}; + true -> % pseudo(reg) + Temp = clone(Off, Reg), + {[hipe_x86:mk_move(Off, Temp)], + Opnd#x86_mem{off=Temp}} + end; + true -> + Temp = clone(Base, Reg), + case src_is_pseudo(Off) of + false -> % imm/reg(pseudo) + {[hipe_x86:mk_move(Base, Temp)], + Opnd#x86_mem{base=Temp}}; + true -> % pseudo1(pseudo0) + {[hipe_x86:mk_move(Base, Temp), + hipe_x86:mk_alu('add', Off, Temp)], + Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}} + end + end; + _ -> + {[], Opnd} + end. + +%%% Check if an operand denotes a memory cell (mem or pseudo). + +is_mem_opnd(Opnd) -> + case Opnd of + #x86_mem{} -> true; + #x86_temp{} -> temp_is_pseudo(Opnd); + _ -> false + end. + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + case hipe_x86:is_temp(Src) of + true -> temp_is_pseudo(Src); + false -> false + end. + +temp_is_pseudo(Temp) -> + not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))). + +%%% Make Reg a clone of Dst (attach Dst's type to Reg). + +clone(Dst, Reg) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + hipe_x86:mk_temp(Reg, Type). + +mk_move(Src, Dst=#x86_temp{type=double}) -> + hipe_x86:mk_fmove(Src, Dst); +mk_move(Src, Dst) -> + hipe_x86:mk_move(Src, Dst). diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl new file mode 100644 index 0000000000..0b70764daf --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl @@ -0,0 +1,452 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-define(ECX, rcx). +-else. +-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-define(ECX, ecx). +-endif. + +-module(?HIPE_X86_RA_POSTCONDITIONS). + +-export([check_and_rewrite/3]). + +-include("../x86/hipe_x86.hrl"). +-define(HIPE_INSTRUMENT_COMPILER, true). +-include("../main/hipe.hrl"). +-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)). + +check_and_rewrite(Defun, Coloring, Strategy) -> + %% io:format("Converting\n"), + TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC), + %% io:format("Rewriting\n"), + #defun{code=Code0} = Defun, + {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}}, + DidSpill}. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill} + case I of + #alu{} -> + do_alu(I, TempMap, Strategy); + #cmp{} -> + do_cmp(I, TempMap, Strategy); + #imul{} -> + do_imul(I, TempMap, Strategy); + #jmp_switch{} -> + do_jmp_switch(I, TempMap, Strategy); + #lea{} -> + do_lea(I, TempMap, Strategy); + #move{} -> + do_move(I, TempMap, Strategy); + #move64{} -> + do_move64(I, TempMap, Strategy); + #movsx{} -> + do_movx(I, TempMap, Strategy); + #movzx{} -> + do_movx(I, TempMap, Strategy); + #fmove{} -> + do_fmove(I, TempMap, Strategy); + #shift{} -> + do_shift(I, TempMap, Strategy); + _ -> + %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall, + %% pseudo_tailcall_prepare, push, ret + {[I], false} + end. + +%%% Fix an alu op. + +do_alu(I, TempMap, Strategy) -> + #alu{src=Src0,dst=Dst0} = I, + {FixSrc,Src,FixDst,Dst,DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}], DidSpill}. + +%%% Fix a cmp op. + +do_cmp(I, TempMap, Strategy) -> + #cmp{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_binary(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}], DidSpill}. + +%%% Fix an imul op. + +do_imul(I, TempMap, Strategy) -> + #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I, + {FixSrc,Src,DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), % temp1 + {FixTempSrc,Temp,FixTempDst,DidSpill2} = + case is_spilled(Temp0, TempMap) of + false -> + {[], Temp0, [], false}; + true -> + Reg = spill_temp0('untagged', Strategy), + {case ImmOpt of + [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src + _ -> [] % temp = src * imm + end, + Reg, + [hipe_x86:mk_move(Reg, Temp0)], + true} + end, + {FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst, + DidSpill1 or DidSpill2}. + +%%% Fix a jmp_switch op. + +-ifdef(HIPE_AMD64). +do_jmp_switch(I, TempMap, Strategy) -> + #jmp_switch{temp=Temp, jtab=Tab} = I, + case is_spilled(Temp, TempMap) of + false -> + case is_spilled(Tab, TempMap) of + false -> + {[I], false}; + true -> + NewTab = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Tab, NewTab), I#jmp_switch{jtab=Tab}], + true} + end; + true -> + case is_spilled(Tab, TempMap) of + false -> + NewTmp = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], + true}; + true -> + NewTmp = spill_temp('untagged', Strategy), + NewTab = spill_temp0('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), + hipe_x86:mk_move(Tab, NewTab), + I#jmp_switch{temp=NewTmp, jtab=NewTab}], + true} + end + end. +-else. % not AMD64 +do_jmp_switch(I, TempMap, Strategy) -> + #jmp_switch{temp=Temp} = I, + case is_spilled(Temp, TempMap) of + false -> + {[I], false}; + true -> + NewTmp = spill_temp('untagged', Strategy), + {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}], + true} + end. +-endif. % not AMD64 + +%%% Fix a lea op. + +do_lea(I, TempMap, Strategy) -> + #lea{temp=Temp} = I, + case is_spilled(Temp, TempMap) of + false -> + {[I], false}; + true -> + NewTmp = spill_temp('untagged', Strategy), + {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)], + true} + end. + +%%% Fix a move op. + +do_move(I, TempMap, Strategy) -> + #move{src=Src0,dst=Dst0} = I, + {FixSrc, Src, FixDst, Dst, DidSpill} = + do_check_byte_move(Src0, Dst0, TempMap, Strategy), + {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}], + DidSpill}. + +-ifdef(HIPE_AMD64). + +%%% AMD64 has no issues with byte moves. +do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> + do_binary(Src0, Dst0, TempMap, Strategy). + +-else. % not AMD64 + +%%% x86 can only do byte moves to a subset of the integer registers. +do_check_byte_move(Src0, Dst0, TempMap, Strategy) -> + case Dst0 of + #x86_mem{type=byte} -> + do_byte_move(Src0, Dst0, TempMap, Strategy); + _ -> + do_binary(Src0, Dst0, TempMap, Strategy) + end. + +do_byte_move(Src0, Dst0, TempMap, Strategy) -> + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + Reg = hipe_x86_registers:eax(), + {FixSrc3, Src3} = % XXX: this just checks Src, the result is known! + case Src of + #x86_imm{} -> + {FixSrc, Src}; + #x86_temp{reg=Reg} -> % small moves must start from reg 1->4 + {FixSrc, Src} % so variable sources are always put in eax + end, + {FixSrc3, Src3, FixDst, Dst, + DidSpill2 or DidSpill1}. + +-endif. % not AMD64 + +%%% Fix a move64 op. + +do_move64(I, TempMap, Strategy) -> + #move64{dst=Dst} = I, + case is_spilled(Dst, TempMap) of + false -> + {[I], false}; + true -> + Reg = clone(Dst, Strategy), + {[I#move64{dst=Reg}, hipe_x86:mk_move(Reg, Dst)], true} + end. + +%%% Fix a movx op. + +do_movx(I, TempMap, Strategy) -> + {{FixSrc, Src, DidSpill1}, {FixDst, Dst, DidSpill2}} = + case I of + #movsx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0, TempMap, Strategy), + fix_dst_operand(Dst0, TempMap, Strategy)}; + #movzx{src=Src0,dst=Dst0} -> + {fix_src_operand(Src0, TempMap, Strategy), + fix_dst_operand(Dst0, TempMap, Strategy)} + end, + {I3, DidSpill3} = + case is_spilled(Dst, TempMap) of + false -> + I2 = case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst)] + end, + {I2, false}; + true -> + Dst2 = clone(Dst, Strategy), + I2 = + case I of + #movsx{} -> + [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]; + #movzx{} -> + [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)] + end, + {I2, true} + end, + {FixSrc++FixDst++I3, + DidSpill3 or DidSpill2 or DidSpill1}. + +%%% Fix an fmove op. + +do_fmove(I, TempMap, Strategy) -> + #fmove{src=Src0,dst=Dst0} = I, + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + %% fmoves from memory position to memory position is handled + %% by the f.p. register allocator. + {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}], + DidSpill1 or DidSpill2}. + +%%% Fix a shift operation. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if the source is a register or memory position +%%% make sure to move it to %ecx + +do_shift(I, TempMap, Strategy) -> + #shift{src=Src0,dst=Dst0} = I, + {FixDst, Dst, DidSpill} = fix_dst_operand(Dst0, TempMap, Strategy), + Reg = ?HIPE_X86_REGISTERS:?ECX(), + case Src0 of + #x86_imm{} -> + {FixDst ++ [I#shift{dst=Dst}], DidSpill}; + #x86_temp{reg=Reg} -> + {FixDst ++ [I#shift{dst=Dst}], DidSpill} + end. + +%%% Fix the operands of a binary op. +%%% 1. remove pseudos from any explicit memory operands +%%% 2. if both operands are (implicit or explicit) memory operands, +%%% move src to a reg and use reg as src in the original insn + +do_binary(Src0, Dst0, TempMap, Strategy) -> + {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), + {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy), + {FixSrc3, Src3, DidSpill3} = + case is_mem_opnd(Src, TempMap) of + false -> + {FixSrc, Src, false}; + true -> + case is_mem_opnd(Dst, TempMap) of + false -> + {FixSrc, Src, false}; + true -> + Src2 = clone(Src, Strategy), + FixSrc2 = FixSrc ++ [hipe_x86:mk_move(Src, Src2)], + {FixSrc2, Src2, true} + end + end, + {FixSrc3, Src3, FixDst, Dst, + DidSpill3 or DidSpill2 or DidSpill1}. + +%%% Fix any x86_mem operand to not refer to any spilled temps. + +fix_src_operand(Opnd, TmpMap, Strategy) -> + fix_mem_operand(Opnd, TmpMap, temp1(Strategy)). + +temp1('normal') -> []; +temp1('linearscan') -> ?HIPE_X86_REGISTERS:temp1(). + +fix_dst_operand(Opnd, TempMap, Strategy) -> + fix_mem_operand(Opnd, TempMap, temp0(Strategy)). + +temp0('normal') -> []; +temp0('linearscan') -> ?HIPE_X86_REGISTERS:temp0(). + +fix_mem_operand(Opnd, TempMap, RegOpt) -> % -> {[fixupcode], newop, DidSpill} + case Opnd of + #x86_mem{base=Base,off=Off} -> + case is_mem_opnd(Base, TempMap) of + false -> + case is_mem_opnd(Off, TempMap) of + false -> + {[], Opnd, false}; + true -> + Temp = clone2(Off, RegOpt), + {[hipe_x86:mk_move(Off, Temp)], + Opnd#x86_mem{off=Temp}, + true} + end; + true -> + Temp = clone2(Base, RegOpt), + case is_mem_opnd(Off, TempMap) of + false -> % imm/reg(pseudo) + {[hipe_x86:mk_move(Base, Temp)], + Opnd#x86_mem{base=Temp}, + true}; + true -> % pseudo(pseudo) + {[hipe_x86:mk_move(Base, Temp), + hipe_x86:mk_alu('add', Off, Temp)], + Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}, + true} + end + end; + _ -> + {[], Opnd, false} + end. + +%%% Check if an operand denotes a memory cell (mem or pseudo). + +is_mem_opnd(Opnd, TempMap) -> + R = + case Opnd of + #x86_mem{} -> true; + #x86_temp{} -> + Reg = hipe_x86:temp_reg(Opnd), + case hipe_x86:temp_is_allocatable(Opnd) of + true -> + case tuple_size(TempMap) > Reg of + true -> + case + hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> false + end; + _ -> + %% impossible, but was true in ls post and false in normal post + exit({?MODULE,is_mem_opnd,Reg}), + false + end; + false -> true + end; + _ -> false + end, + %% io:format("Op ~w mem: ~w\n",[Opnd,R]), + R. + +%%% Check if an operand is a spilled Temp. + +is_spilled(Temp, TempMap) -> + case hipe_x86:temp_is_allocatable(Temp) of + true -> + Reg = hipe_x86:temp_reg(Temp), + case tuple_size(TempMap) > Reg of + true -> + case hipe_temp_map:is_spilled(Reg, TempMap) of + true -> + ?count_temp(Reg), + true; + false -> + false + end; + false -> + false + end; + false -> true + end. + +%%% Make Reg a clone of Dst (attach Dst's type to Reg). + +clone(Dst, Strategy) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + spill_temp(Type, Strategy). + +spill_temp0(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp0(Type, 'linearscan') -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). + +spill_temp(Type, 'normal') -> + hipe_x86:mk_new_temp(Type); +spill_temp(Type, 'linearscan') -> + hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). + +%%% Make a certain reg into a clone of Dst + +clone2(Dst, RegOpt) -> + Type = + case Dst of + #x86_mem{} -> hipe_x86:mem_type(Dst); + #x86_temp{} -> hipe_x86:temp_type(Dst) + end, + case RegOpt of + [] -> hipe_x86:mk_new_temp(Type); + Reg -> hipe_x86:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl new file mode 100644 index 0000000000..6bdb08c6fb --- /dev/null +++ b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl @@ -0,0 +1,63 @@ +%% $Id$ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +%% Linear Scan register allocator for x87 + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls). +-define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87). +-define(HIPE_X86_PP, hipe_amd64_pp). +-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls). +-else. +-define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls). +-define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87). +-define(HIPE_X86_PP, hipe_x86_pp). +-define(HIPE_X86_RA_LS, hipe_x86_ra_ls). +-endif. + +-module(?HIPE_X86_RA_X87_LS). +-export([ra/2]). + +%%-define(DEBUG,1). + +-define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation. +-include("../main/hipe.hrl"). + +ra(Defun, Options) -> + ?inc_counter(ra_calls_counter,1), + CFG = hipe_x86_cfg:init(Defun), + %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)), + SpillIndex = 0, + SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG), + ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))), + + ?inc_counter(ra_iteration_counter,1), + %% ?HIPE_X86_PP:pp(Defun), + Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above? + + {Coloring,NewSpillIndex} = + ?HIPE_X86_RA_LS:regalloc(Cfg, + ?HIPE_X86_SPECIFIC_X87:allocatable(), + [hipe_x86_cfg:start_label(Cfg)], + SpillIndex, SpillLimit, Options, + ?HIPE_X86_SPECIFIC_X87), + + ?add_spills(Options, NewSpillIndex), + {Defun, Coloring, NewSpillIndex}. diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl new file mode 100644 index 0000000000..1cfa095995 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_registers.erl @@ -0,0 +1,254 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% +%%% TODO: +%%% - Do we need a pseudo reg for the condition codes? + +-module(hipe_x86_registers). + +-export([reg_name/1, + first_virtual/0, + is_precoloured/1, + is_precoloured_x87/1, + all_precoloured/0, + eax/0, + ecx/0, + temp0/0, + temp1/0, + sp/0, + proc_pointer/0, + heap_limit/0, + fcalls/0, + proc_offset/1, + sp_limit_offset/0, + is_fixed/1, + %% fixed/0, + allocatable/0, + allocatable_x87/0, + nr_args/0, + arg/1, + is_arg/1, + args/1, + nr_rets/0, + ret/1, + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0, + float_size/0, + wordsize/0, + alignment/0]). + +-include("../rtl/hipe_literals.hrl"). + +-ifdef(X86_HP_IN_ESI). +-export([heap_pointer/0]). +-endif. + +-define(EAX, 0). +-define(ECX, 1). +-define(EDX, 2). +-define(EBX, 3). +-define(ESP, 4). +-define(EBP, 5). +-define(ESI, 6). +-define(EDI, 7). +-define(FCALLS, 8). % proc field alias +-define(HEAP_LIMIT, 9). % proc field alias +-define(LAST_PRECOLOURED, 9). + +-define(ARG0, ?EAX). +-define(ARG1, ?EDX). +-define(ARG2, ?ECX). +-define(ARG3, ?EBX). +-define(ARG4, ?EDI). + +-define(RET0, ?EAX). +-define(RET1, ?EDX). +-define(RET2, ?ECX). +-define(RET3, ?EBX). +-define(RET4, ?EDI). + +-define(TEMP0, ?EBX). % XXX: was EAX +-define(TEMP1, ?EDI). % XXX: was EDX then EDI + +-define(PROC_POINTER, ?EBP). + +reg_name(R) -> + case R of + ?EAX -> "%eax"; + ?ECX -> "%ecx"; + ?EDX -> "%edx"; + ?EBX -> "%ebx"; + ?ESP -> "%esp"; + ?EBP -> "%ebp"; + ?ESI -> "%esi"; + ?EDI -> "%edi"; + ?FCALLS -> "%fcalls"; + ?HEAP_LIMIT -> "%hplim"; + Other -> "%r" ++ integer_to_list(Other) + end. + +first_virtual() -> ?LAST_PRECOLOURED + 1. + +is_precoloured(X) -> X =< ?LAST_PRECOLOURED. + +is_precoloured_x87(X) -> X =< 6. + +all_precoloured() -> + [?EAX, + ?ECX, + ?EDX, + ?EBX, + ?ESP, + ?EBP, + ?ESI, + ?EDI, + ?FCALLS, + ?HEAP_LIMIT]. + +eax() -> ?EAX. +ecx() -> ?ECX. +temp0() -> ?TEMP0. +temp1() -> ?TEMP1. +sp() -> ?ESP. +proc_pointer() -> ?PROC_POINTER. +fcalls() -> ?FCALLS. +heap_limit() -> ?HEAP_LIMIT. + +-ifdef(X86_HP_IN_ESI). +-define(ESI_IS_FIXED,1). +-define(HEAP_POINTER, ?ESI). +heap_pointer() -> ?HEAP_POINTER. +is_heap_pointer(?HEAP_POINTER) -> true; +is_heap_pointer(_) -> false. +-define(LIST_HP_FIXED,[?HEAP_POINTER]). +-define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]). +-else. +is_heap_pointer(_) -> false. +-define(LIST_HP_FIXED,[]). +-define(LIST_HP_LIVE_AT_RETURN,[]). +-endif. + +-ifdef(ESI_IS_FIXED). +-define(LIST_ESI_ALLOCATABLE,[]). +-define(LIST_ESI_CALL_CLOBBERED,[]). +-else. +-define(LIST_ESI_ALLOCATABLE,[?ESI]). +-define(LIST_ESI_CALL_CLOBBERED,[{?ESI,tagged},{?ESI,untagged}]). +-endif. + +proc_offset(?FCALLS) -> ?P_FCALLS; +proc_offset(?HEAP_LIMIT) -> ?P_HP_LIMIT; +proc_offset(_) -> false. + +sp_limit_offset() -> ?P_NSP_LIMIT. + +is_fixed(?ESP) -> true; +is_fixed(?PROC_POINTER) -> true; +is_fixed(?FCALLS) -> true; +is_fixed(?HEAP_LIMIT) -> true; +is_fixed(R) -> is_heap_pointer(R). + +%% fixed() -> +%% [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED]. + +allocatable() -> + [?EDX, ?ECX, ?EBX, ?EAX, ?EDI| ?LIST_ESI_ALLOCATABLE]. + +allocatable_x87() -> + [0,1,2,3,4,5,6]. + +nr_args() -> ?X86_NR_ARG_REGS. + +arg(N) -> + if N < ?X86_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?X86_NR_ARG_REGS > 0; + ?ARG1 -> ?X86_NR_ARG_REGS > 1; + ?ARG2 -> ?X86_NR_ARG_REGS > 2; + ?ARG3 -> ?X86_NR_ARG_REGS > 3; + ?ARG4 -> ?X86_NR_ARG_REGS > 4; + _ -> false + end. + +args(Arity) when is_integer(Arity), Arity >= 0 -> + N = erlang:min(Arity, ?X86_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +nr_rets() -> ?X86_NR_RET_REGS. + +ret(N) -> + if N < ?X86_NR_RET_REGS -> + case N of + 0 -> ?RET0; + 1 -> ?RET1; + 2 -> ?RET2; + 3 -> ?RET3; + 4 -> ?RET4; + _ -> exit({?MODULE, ret, N}) + end; + true -> + exit({?MODULE, ret, N}) + end. + +call_clobbered() -> + [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not? + {?EDX,tagged},{?EDX,untagged}, + {?ECX,tagged},{?ECX,untagged}, + {?EBX,tagged},{?EBX,untagged}, + {?EDI,tagged},{?EDI,untagged} + | ?LIST_ESI_CALL_CLOBBERED] ++ all_x87_pseudos(). + +tailcall_clobbered() -> % tailcall crapola needs two temps + [{?TEMP0,tagged},{?TEMP0,untagged}, + {?TEMP1,tagged},{?TEMP1,untagged}] ++ all_x87_pseudos(). + +all_x87_pseudos() -> + [{0,double}, {1,double}, {2,double}, {3,double}, + {4,double}, {5,double}, {6,double}]. + +live_at_return() -> + [{?ESP,untagged} + ,{?PROC_POINTER,untagged} + ,{?FCALLS,untagged} + ,{?HEAP_LIMIT,untagged} + | ?LIST_HP_LIVE_AT_RETURN + ]. + +alignment() -> 4. + +float_size() -> 8. + +wordsize() -> 4. diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl new file mode 100644 index 0000000000..e60c446e17 --- /dev/null +++ b/lib/hipe/x86/hipe_x86_spill_restore.erl @@ -0,0 +1,345 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008) +%% ==================================================================== + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_SPECIFIC, hipe_amd64_specific). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-define(X86STR, "amd64"). +-else. +-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_SPECIFIC, hipe_x86_specific). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-define(X86STR, "x86"). +-endif. + +-module(?HIPE_X86_SPILL_RESTORE). + +-export([spill_restore/2]). + +%% controls which set library is used to keep temp variables. +-define(SET_MODULE, ordsets). + +%% Turn on instrumentation. +-define(HIPE_INSTRUMENT_COMPILER, true). + +-include("../main/hipe.hrl"). +-include("../x86/hipe_x86.hrl"). % Added for the definition of #pseudo_call{} +-include("../flow/cfg.hrl"). % Added for the definition of #cfg{} + +%% Main function +spill_restore(Defun, Options) -> + CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options), + CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options), + hipe_x86_cfg:linearise(CFGFinal). + +%% Performs the first pass of the algorithm. +%% By working bottom up, introduce the pseudo_spills. +firstPass(Defun) -> + CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun), + %% get the labels bottom up + Labels = hipe_x86_cfg:postorder(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + %% spill around the function will be introduced below the move + %% formals, so get all labels except it. + LabelsExceptMoveFormals = lists:sublist(Labels, length(Labels)-1), + %% all work is done by the helper function firstPassHelper + %% saveTree keeps the all newly introduced spills. Keys are the labels. + {CFG1, SaveTree} = firstPassHelper(LabelsExceptMoveFormals, Liveness, CFG0), + case hipe_x86_cfg:reverse_postorder(CFG0) of + [Label1, Label2|_] -> + SaveTreeElement = saveTreeLookup(Label2, SaveTree), + %% FilteredSaveTreeElement is the to be spilled temps around the function call. + %% They are spilled just before move formals + FilteredSaveTreeElement = [Temp || Temp <- SaveTreeElement, temp_is_pseudo(Temp)], + Block = hipe_x86_cfg:bb(CFG1, Label1), + Code = hipe_bb:code(Block), + %% The following statements are tedious but work ok. + %% Put spills between move formals and the jump code. + %% This disgusting thing is done because spills should be + %% introduced after move formals. + %% Another solution may be to introduce another block. + MoveCodes = lists:sublist(Code, length(Code)-1), + JumpCode = lists:last(Code), + hipe_x86_cfg:bb_add(CFG1, Label1, hipe_bb:mk_bb(MoveCodes ++ [hipe_x86:mk_pseudo_spill(FilteredSaveTreeElement)] ++ [JumpCode])); + _ -> + CFG1 + end. + +%% helper function of firstPass + +%% processes all labels recursively and decides the spills to be put. +%% spills are introduced before each function call (pseudo_call) as well as +%% global spill is found +firstPassHelper(Labels, Liveness, CFG) -> + firstPassHelper(Labels, Liveness, CFG, gb_trees:empty()). + +firstPassHelper([Label|Labels], Liveness, CFG, SaveTree) -> + LiveOut = from_list(?HIPE_X86_LIVENESS:liveout(Liveness, Label)), + Block = hipe_x86_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + Succ = hipe_x86_cfg:succ(CFG, Label), + IntersectedSaveList = findIntersectedSaveList(Succ,SaveTree), + %% call firstPassDoBlock which will give the updated block + %% code(including spills) as well as Intersected Save List which + %% should be passed above blocks + {_,NewIntersectedList,NewCode} = + firstPassDoBlock(Code, LiveOut,IntersectedSaveList), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock), + SizeOfSet = setSize(NewIntersectedList), + + %% if the Intersected Save List is not empty, insert it in the save tree. + if SizeOfSet =/= 0 -> + UpdatedSaveTree = gb_trees:insert(Label,NewIntersectedList,SaveTree), + firstPassHelper(Labels, Liveness, NewCFG,UpdatedSaveTree); + true -> + firstPassHelper(Labels, Liveness, NewCFG,SaveTree) + end; +firstPassHelper([], _, CFG, SaveTree) -> + {CFG, SaveTree}. + +%% handle each instruction in the block bottom up +firstPassDoBlock(Insts, LiveOut, IntersectedSaveList) -> + lists:foldr(fun firstPassDoInsn/2, {LiveOut,IntersectedSaveList,[]}, Insts). + +firstPassDoInsn(I, {LiveOut,IntersectedSaveList,PrevInsts} ) -> + case I of + #pseudo_call{} -> + do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}); + _ -> % other instructions + DefinedList = from_list( ?HIPE_X86_LIVENESS:defines(I)), + UsedList = from_list(?HIPE_X86_LIVENESS:uses(I)), + + NewLiveOut = subtract(union(LiveOut, UsedList), DefinedList), + NewIntersectedSaveList = subtract(IntersectedSaveList, DefinedList), + + {NewLiveOut, NewIntersectedSaveList, [I|PrevInsts]} + end. + +do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}) -> + LiveTemps = [Temp || Temp <- to_list(LiveOut), temp_is_pseudo(Temp)], + NewIntersectedSaveList = union(IntersectedSaveList, LiveOut), + {LiveOut, NewIntersectedSaveList, [hipe_x86:mk_pseudo_spill(LiveTemps), I | PrevInsts]}. + +findIntersectedSaveList(LabelList, SaveTree) -> + findIntersectedSaveList([saveTreeLookup(Label,SaveTree) || Label <- LabelList]). + +findIntersectedSaveList([]) -> + []; +findIntersectedSaveList([List1]) -> + List1; +findIntersectedSaveList([List1,List2|Rest]) -> + findIntersectedSaveList([intersection(List1, List2)|Rest]). + +saveTreeLookup(Label, SaveTree) -> + case gb_trees:lookup(Label, SaveTree) of + {value, SaveList} -> + SaveList; + _ -> + [] + end. + +%% Performs the second pass of the algoritm. +%% It basically eliminates the unnecessary spills and introduces restores. +%% Works top down +secondPass(CFG0) -> + Labels = hipe_x86_cfg:reverse_postorder(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + secondPassHelper(Labels,Liveness,CFG0). + +%% helper function of secondPass. + +%% recursively handle all labels given. +secondPassHelper(Labels, Liveness, CFG) -> + secondPassHelper(Labels, Liveness, CFG, gb_trees:empty(), CFG). + +%% AccumulatedCFG stands for the CFG that has restore edges incrementally. +%% UnmodifiedCFG is the CFG created after first pass. + +%% AccumulatedSaveTree is used to eliminate the unnecessary saves. The +%% saves (spills) in above blocks are traversed down (if still live +%% and not redefined) and redundant saves are eliminated in the lower +%% blocks. +%% For memory efficiency, it may be better not to maintain the +%% AccumulatedSaveTree but traverse the tree recursively and pass the +%% save lists to the childs individually. +%% But current approach may be faster even though it needs bigger memory. + +secondPassHelper([Label|RestOfLabels], Liveness, + AccumulatedCFG, AccumulatedSaveTree, UnmodifiedCFG) -> + LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), + Block = hipe_x86_cfg:bb(AccumulatedCFG, Label), + Code = hipe_bb:code(Block), + + %% UnmodifiedCFG is needed for getting the correct predecessors. + %% (i.e. not to get the restore edge blocks) + PredList = hipe_x86_cfg:pred(UnmodifiedCFG, Label), + %% find the spills coming from all the parents by intersecting + InitialAccumulatedSaveList = + findIntersectedSaveList(PredList, AccumulatedSaveTree), + AccumulatedSaveList = + keepLiveVarsInAccumSaveList(InitialAccumulatedSaveList, LiveOut), + + {NewCode, CFGUpdateWithRestores, NewAccumulatedSaveList} = + secondPassDoBlock(Label, Code, AccumulatedCFG, AccumulatedSaveList), + + UpdatedAccumulatedSaveTree = + gb_trees:insert(Label, NewAccumulatedSaveList, AccumulatedSaveTree), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_x86_cfg:bb_add(CFGUpdateWithRestores, Label, NewBlock), + secondPassHelper(RestOfLabels, Liveness, NewCFG, + UpdatedAccumulatedSaveTree, UnmodifiedCFG); +secondPassHelper([], _, AccumulatedCFG, _, _) -> + AccumulatedCFG. + +secondPassDoBlock(CurrentLabel, Insts, CFG, AccumulatedSaveList) -> + {NewAccumulatedSaveList,NewInsts,_,_,CFGUpdateWithRestores} = + lists:foldl(fun secondPassDoInsn/2, {AccumulatedSaveList,[],[],CurrentLabel,CFG}, Insts), + {NewInsts, CFGUpdateWithRestores, NewAccumulatedSaveList}. + +secondPassDoInsn(I, {AccumulatedSaveList,PrevInsts,SpillList,CurrentLabel,CFG}) -> + case I of + #pseudo_spill{} -> + %% spill variables that are not accumulated from top down + %% (which are not already saved) + VariablesAlreadySaved = [X || {X,_} <- to_list(AccumulatedSaveList)], + VariablesToBeSpilled = I#pseudo_spill.args -- VariablesAlreadySaved, + NewSpillList = [{Temp, hipe_x86:mk_new_temp(Temp#x86_temp.type)} || Temp <- VariablesToBeSpilled], + %% update accumulated saved list by adding the newly spilled variables. + NewAccumulatedSaveList = union(AccumulatedSaveList, from_list(NewSpillList)), + {NewAccumulatedSaveList, PrevInsts ++ secondPassDoPseudoSpill(NewSpillList), NewSpillList, CurrentLabel, CFG}; + #pseudo_call{} -> + {CFGUpdateWithRestores, NewPseudoCall} = + secondPassDoPseudoCall(I, AccumulatedSaveList, CFG), + %% spill list is emptied after use + {AccumulatedSaveList, PrevInsts ++ [NewPseudoCall], CurrentLabel, [], CFGUpdateWithRestores}; + _ -> + %% remove the defined variables from the accumulated save + %% list since they need to be saved again in later occasions. + DefinedList = from_list(?HIPE_X86_LIVENESS:defines(I)), + NewAccumulatedSaveList = removeRedefVarsFromAccumSaveList(AccumulatedSaveList, DefinedList), + {NewAccumulatedSaveList, PrevInsts ++ [I], SpillList, CurrentLabel, CFG} + end. + +%% remove dead vars from accumulated save list so that they are not restored. +keepLiveVarsInAccumSaveList([], _) -> + []; +keepLiveVarsInAccumSaveList([{Var,Temp}|Rest], DefinedList) -> + IsDefined = is_element(Var, DefinedList), + case IsDefined of + true -> [{Var,Temp}|keepLiveVarsInAccumSaveList(Rest, DefinedList)]; + false -> keepLiveVarsInAccumSaveList(Rest, DefinedList) + end. + +%% remove the redefined variables from accumulated save list since +%% they are changed. +removeRedefVarsFromAccumSaveList([], _) -> + []; +removeRedefVarsFromAccumSaveList([{Var,Temp}|Rest], DefinedList) -> + IsDefined = is_element(Var, DefinedList), + case IsDefined of + true -> removeRedefVarsFromAccumSaveList(Rest, DefinedList); + false -> [{Var,Temp}|removeRedefVarsFromAccumSaveList(Rest, DefinedList)] + end. + +%% convert pseudo_spills to move instructions. +secondPassDoPseudoSpill(SpillList) -> + lists:foldl(fun convertPseudoSpillToMov/2, [], SpillList). + +%% if there are variables to be restored, then call addRestoreBlockToEdge to +%% place them in a new block on the edge of the blocks. +secondPassDoPseudoCall(I, RestoreList, CFG) -> + ContLabel = I#pseudo_call.contlab, + SizeOfSet = setSize(RestoreList), + if SizeOfSet =/= 0 -> + addRestoreBlockToEdge(I, ContLabel, CFG, RestoreList); + true -> + {CFG, I} + end. + +%% prepares the moves for the spills. +convertPseudoSpillToMov({Temp, NewTemp}, OtherMoves) -> + OtherMoves ++ [mkMove(Temp, NewTemp)]. + +%% prepares the moves for the restores. +%% Called by addRestoreBlockToEdge while introducing the restores. +convertPseudoRestoreToMov({Temp, NewTemp}, OtherMoves) -> + OtherMoves ++ [mkMove(NewTemp, Temp)]. + +%% makes the move record, special care is taken for doubles. +mkMove(NewTemp,Temp) -> + if Temp#x86_temp.type =:= 'double' -> + hipe_x86:mk_fmove(NewTemp, Temp); + true -> + hipe_x86:mk_move(NewTemp, Temp) + end. + +%% adds a new block (on the edge) that includes introduced restore moves. +addRestoreBlockToEdge(PseudoCall, ContLabel, CFG, TempArgsList) -> + NextLabel = hipe_gensym:get_next_label(x86), + NewCode = lists:foldl(fun convertPseudoRestoreToMov/2, [], TempArgsList) ++ [hipe_x86:mk_jmp_label(ContLabel)], + NewBlock = hipe_bb:mk_bb(NewCode), + NewPseudoCall = redirect_pseudo_call(PseudoCall, ContLabel, NextLabel), + NewCFG = hipe_x86_cfg:bb_add(CFG, NextLabel, NewBlock), + {NewCFG, NewPseudoCall}. + +%% used instead of hipe_x86_cfg:redirect_jmp since it does not handle pseudo_call calls. +redirect_pseudo_call(I = #pseudo_call{contlab=ContLabel}, Old, New) -> + case Old =:= ContLabel of + true -> I#pseudo_call{contlab=New}; + false -> I + end. + +temp_is_pseudo(Temp) -> + case hipe_x86:is_temp(Temp) of + true -> not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); + false -> false + end. + +%%--------------------------------------------------------------------- +%% Set operations where the module name is an easily changeable macro +%%--------------------------------------------------------------------- + +union(Set1,Set2) -> + ?SET_MODULE:union(Set1,Set2). + +setSize(Set) -> + ?SET_MODULE:size(Set). + +from_list(List) -> + ?SET_MODULE:from_list(List). + +to_list(Set) -> + ?SET_MODULE:to_list(Set). + +subtract(Set1, Set2) -> + ?SET_MODULE:subtract(Set1, Set2). + +intersection(Set1, Set2) -> + ?SET_MODULE:intersection(Set1, Set2). + +is_element(Element, Set) -> + ?SET_MODULE:is_element(Element, Set). diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl new file mode 100644 index 0000000000..6ef14abdbb --- /dev/null +++ b/lib/hipe/x86/hipe_x86_x87.erl @@ -0,0 +1,635 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% Floating point handling. + +-ifdef(HIPE_AMD64). +-define(HIPE_X86_X87, hipe_amd64_x87). +-define(HIPE_X86_DEFUSE, hipe_amd64_defuse). +-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). +-define(HIPE_X86_REGISTERS, hipe_amd64_registers). +-else. +-define(HIPE_X86_X87, hipe_x86_x87). +-define(HIPE_X86_DEFUSE, hipe_x86_defuse). +-define(HIPE_X86_LIVENESS, hipe_x86_liveness). +-define(HIPE_X86_REGISTERS, hipe_x86_registers). +-endif. + +-module(?HIPE_X86_X87). + +-export([map/1]). + +-include("../x86/hipe_x86.hrl"). +-include("../main/hipe.hrl"). + +%%---------------------------------------------------------------------- + +map(Defun) -> + CFG0 = hipe_x86_cfg:init(Defun), + %% hipe_x86_cfg:pp(CFG0), + Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), + StartLabel = hipe_x86_cfg:start_label(CFG0), + {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()), + hipe_x86_cfg:linearise(CFG1). + +do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) -> + case gb_trees:lookup(Lbl, BlockMap) of + none -> + %% This block has not been visited. + Block = hipe_x86_cfg:bb(CFG, Lbl), + Succ = hipe_x86_cfg:succ(CFG, Lbl), + NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap), + LiveOut = [X || X <- ?HIPE_X86_LIVENESS:liveout(Liveness, Lbl), + is_fp(X)], + Code = hipe_bb:code(Block), + ReverseCode = lists:reverse(Code), + {NewCode0, NewMap, NewBlockMap1, Dirty} = + do_block(ReverseCode, LiveOut, Map, NewBlockMap), + NewCFG1 = + case Dirty of + true -> + NewBlock = hipe_bb:code_update(Block, NewCode0), + hipe_x86_cfg:bb_add(CFG, Lbl, NewBlock); + _ -> + CFG + end, + {NewCFG3, NewBlockMap2} = + do_blocks(Lbl, Succ, NewCFG1, Liveness, NewMap, NewBlockMap1), + do_blocks(Pred, Lbls, NewCFG3, Liveness, Map, NewBlockMap2); + {value, fail} -> + %% Don't have to follow this trace any longer. + do_blocks(Pred,Lbls, CFG, Liveness, Map, BlockMap); + {value, ExistingMap} -> + %% This block belongs to a trace already handled. + %% The Map coming in must be identical to the one used + %% when the block was processed. + if ExistingMap =:= Map -> + do_blocks(Pred, Lbls, CFG, Liveness, Map, BlockMap); + true -> + NewCFG = do_shuffle(Pred, Lbl, CFG, Map, ExistingMap), + do_blocks(Pred, Lbls, NewCFG, Liveness, Map, BlockMap) + end + end; +do_blocks(_Pred, [], CFG, _Liveness, _Map, BlockMap) -> + {CFG, BlockMap}. + +do_block(Ins, LiveOut, Map, BlockMap) -> + do_block(Ins, LiveOut, Map, BlockMap, false). + +do_block([I|Is], LiveOut, Map, BlockMap, Dirty) -> + case handle_insn(I) of + false -> + {NewCode, NewMap, NewBlockMap, NewDirty} = + do_block(Is, LiveOut, Map, BlockMap, Dirty), + {NewCode++[I], NewMap, NewBlockMap, NewDirty}; + true -> + Def = ordsets:from_list(?HIPE_X86_DEFUSE:insn_def(I)), + Use = ordsets:from_list(?HIPE_X86_DEFUSE:insn_use(I)), + NewLiveOut = + ordsets:filter(fun(X) -> is_fp(X) end, + ordsets:union(ordsets:subtract(LiveOut, Def), Use)), + {NewCode, NewMap, NewBlockMap, NewDirty} = + do_block(Is, NewLiveOut, Map, BlockMap, Dirty), + {NewI, NewMap1, NewBlockMap1} = + do_insn(I, LiveOut, NewMap, NewBlockMap), + NewDirty1 = + if NewDirty =:= true -> true; + NewI =:= [I] -> false; + true -> true + end, + {NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1} + end; +do_block([], LiveOut, Map, BlockMap, Dirty) -> + case [X || X <- Map, not lists:member(X, LiveOut)] of + [] -> + {[], Map, BlockMap, Dirty}; + Pop -> + {PopIns, NewMap} = pop_dead(Pop, Map), + {PopIns, NewMap, BlockMap, true} + end. + +do_shuffle(Pred, Lbl, CFG, OldMap, NewMap) -> + %% First make sure both maps have the same members. + Push = NewMap -- OldMap, + Pop = OldMap -- NewMap, + {PopInsn, OldMap0} = pop_dead(Pop, OldMap), + {PushInsn, OldMap1} = + case Push of + []-> {[], OldMap0}; + _-> push_list(lists:reverse(Push), OldMap0) + end, + Code = + if OldMap1 =:= NewMap -> + %% It was enough to push and pop. + PopInsn ++ PushInsn ++ [hipe_x86:mk_jmp_label(Lbl)]; + true -> + %% Shuffle the positions so the maps match + Cycles = find_swap_cycles(OldMap1, NewMap), + SwitchInsns = do_switching(Cycles), + PopInsn ++ PushInsn ++ SwitchInsns ++ [hipe_x86:mk_jmp_label(Lbl)] + end, + %% Update the CFG. + NewLabel = hipe_gensym:get_next_label(x86), + NewCFG1 = hipe_x86_cfg:bb_add(CFG, NewLabel, hipe_bb:mk_bb(Code)), + OldPred = hipe_x86_cfg:bb(NewCFG1, Pred), + PredCode = hipe_bb:code(OldPred), + NewLast = redirect(lists:last(PredCode), Lbl,NewLabel), + NewPredCode = butlast(PredCode) ++ [NewLast], + NewPredBB = hipe_bb:code_update(OldPred, NewPredCode), + hipe_x86_cfg:bb_add(NewCFG1, Pred, NewPredBB). + +find_swap_cycles(OldMap, NewMap) -> + Moves = [get_pos(X, NewMap, 1) || X <- OldMap], + find_swap_cycles(OldMap, Moves, lists:seq(1, length(OldMap)), []). + +find_swap_cycles(OldMap, Moves, NotHandled, Cycles) -> + if NotHandled =:= [] -> Cycles; + true -> + Cycle = find_cycle(Moves, [hd(NotHandled)]), + NewNotHandled = NotHandled -- Cycle, + case lists:member(1, Cycle) of + true -> + %% The cycle that contains the first element on the stack + %% must be processed last. + NewCycle = format_cycle(Cycle), + find_swap_cycles(OldMap, Moves, NewNotHandled, Cycles ++ [NewCycle]); + _ -> + NewCycle = format_cycle(Cycle), + find_swap_cycles(OldMap, Moves, NewNotHandled, [NewCycle|Cycles]) + end + end. + +find_cycle(Moves, Cycle) -> + To = lists:nth(lists:last(Cycle), Moves), + if To =:= hd(Cycle) -> Cycle; + true -> find_cycle(Moves, Cycle ++ [To]) + end. + +format_cycle(C) -> + %% The position numbers start with 1 - should start with 0. + %% If position 0 is in the cycle it will be permuted until + %% the 0 is first and then remove it. + %% Otherwise the first element is also added last. + NewCycle = [X - 1 || X <- C], + case lists:member(0, NewCycle) of + true -> format_cycle(NewCycle, []); + _ -> NewCycle ++ [hd(NewCycle)] + end. + +format_cycle([H|T], NewCycle) -> + case H of + 0 -> T ++ NewCycle; + _ -> format_cycle(T, NewCycle ++ [H]) + end. + +do_switching(Cycles) -> + do_switching(Cycles, []). + +do_switching([C|Cycles], Insns) -> + NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X <- C], + do_switching(Cycles, NewInsns); +do_switching([], Insns) -> + Insns. + +redirect(Insn, OldLbl, NewLbl) -> + case Insn of + #pseudo_call{contlab = ContLab, sdesc = SDesc} -> + #x86_sdesc{exnlab = ExnLab} = SDesc, + if ContLab =:= OldLbl -> + Insn#pseudo_call{contlab = NewLbl}; + ExnLab =:= OldLbl -> + Insn#pseudo_call{sdesc = SDesc#x86_sdesc{exnlab = NewLbl}} + end; + _ -> + hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl) + end. + +do_insn(I, LiveOut, Map, BlockMap) -> + case I of + #pseudo_call{'fun' = Fun, contlab = ContLab} -> + case Fun of + %% We don't want to spill anything if an exception has been thrown. + {_, 'handle_fp_exception'} -> + NewBlockMap = + case gb_trees:lookup(ContLab, BlockMap) of + {value, fail} -> + BlockMap; + {value, _} -> + gb_trees:update(ContLab, fail, BlockMap); + none -> + gb_trees:insert(ContLab, fail, BlockMap) + end, + {[I], [], NewBlockMap}; + _ -> + {pop_all(Map)++[I],[],BlockMap} + end; + #fp_unop{op = 'fwait'} -> + Store = pseudo_pop(Map), + {Store ++ [I], Map, BlockMap}; + #fp_unop{} -> + {NewI, NewMap} = do_fp_unop(I, LiveOut, Map), + {NewI, NewMap, BlockMap}; + #fp_binop{} -> + {NewI, NewMap} = do_fp_binop(I, LiveOut, Map), + {NewI, NewMap, BlockMap}; + #fmove{src = Src, dst = Dst} -> + if Src =:= Dst -> + %% Don't need to keep this instruction! + %% However, we may need to pop from the stack. + case is_liveOut(Src, LiveOut) of + true-> + {[], Map, BlockMap}; + false -> + {SwitchInsn, NewMap0} = switch_first(Dst, Map), + NewMap = pop(NewMap0), + {SwitchInsn++pop_insn(), NewMap, BlockMap} + end; + true -> + {NewI, NewMap} = do_fmove(Src, Dst, LiveOut, Map), + {NewI, NewMap, BlockMap} + end; + _ -> + {[I], Map, BlockMap} + end. + +do_fmove(Src, Dst = #x86_mem{}, LiveOut, Map) -> + %% Storing a float from the stack into memory. + {SwitchInsn, NewMap0} = switch_first(Src, Map), + case is_liveOut(Src, LiveOut) of + true -> + {SwitchInsn ++ [hipe_x86:mk_fp_unop(fst, Dst)], NewMap0}; + _ -> + NewMap1 = pop(NewMap0), + {SwitchInsn ++ [hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1} + end; +do_fmove(Src = #x86_mem{}, Dst, _LiveOut, Map) -> + %% Pushing a float into the stack. + case in_map(Dst, Map) of + true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}}); + _ -> ok + end, + {PushOp, [_|NewMap0]} = push(Src, Map), + %% We want Dst in the map rather than Src. + NewMap = [Dst|NewMap0], + {PushOp, NewMap}; +do_fmove(Src, Dst, LiveOut, Map) -> + %% Copying a float that either is spilled or is on the fp stack, + %% or converting a fixnum in a temp to a float on the fp stack. + case in_map(Dst, Map) of + true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}}); + _ -> ok + end, + IsConv = + case Src of + #x86_temp{type = Type} -> Type =/= 'double'; + _ -> false + end, + case IsConv of + true -> + do_conv(Src, Dst, Map); + _ -> + %% Copying. + case {is_liveOut(Src, LiveOut), in_map(Src, Map)} of + {false, true} -> + %% Just remap Dst to Src + {Head, [_|T]} = lists:splitwith(fun(X) -> X =/= Src end, Map), + {[], Head ++ [Dst|T]}; + _ -> + {PushOp, [_|NewMap0]} = push(Src, Map), + %% We want Dst in the map rather than Src. + NewMap = [Dst|NewMap0], + {PushOp, NewMap} + end + end. + +do_conv(Src = #x86_temp{reg = Reg}, Dst, Map) -> + %% Converting. Src must not be a register, so we + %% might have to put it into memory in between. + {Move, NewSrc} = + case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of + true -> + Temp = hipe_x86:mk_new_temp('untagged'), + {[hipe_x86:mk_move(Src,Temp)], Temp}; + _ -> + {[], Src} + end, + {PushOp, [_|NewMap0]} = push(NewSrc, Map), + %% We want Dst in the map rather than NewSrc. + NewMap = [Dst|NewMap0], + case length(PushOp) of + 1 -> %% No popping of memory object on fpstack + {Move ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}; + _ -> %% H contains pop instructions. Must be kept! + Head = butlast(PushOp), + {Move ++ Head ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap} + end. + +do_fp_unop(I = #fp_unop{arg = Arg, op = fchs}, Liveout, Map) -> + %% This is fchs, the only operation without a + %% popping version. Needs special handling. + case is_liveOut(Arg, Liveout) of + true -> + {SwitchIns, NewMap} = switch_first(Arg, Map), + {SwitchIns ++ [I#fp_unop{arg = []}], NewMap}; + false -> + %% Don't need to keep this instruction! + %% However, we may need to pop Src from the stack. + case in_map(Arg, Map) of + true -> + {SwitchInsn, NewMap0} = switch_first(Arg, Map), + NewMap = pop(NewMap0), + {SwitchInsn ++ pop_insn(), NewMap}; + _ -> + {[],Map} + end + end. + +do_fp_binop(#fp_binop{src = Src, dst = Dst, op = Op}, LiveOut, Map) -> + case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of + {true, true} -> + keep_both(Op, Src, Dst, Map); + {true, false} -> + keep_src(Op, Src, Dst, Map); + {false, true} -> + keep_dst(Op, Src, Dst, Map); + {false, false} -> + %% Both Dst and Src are popped. + keep_none(Op, Src, Dst, Map) + end. + +keep_both(Op, Src, Dst, Map) -> + %% Keep both Dst and Src if it is there. + {SwitchInsn, NewMap} = switch_first(Dst, Map), + NewSrc = get_new_opnd(Src, NewMap), + Insn = format_fp_binop(Op, NewSrc, mk_st(0)), + {SwitchInsn++Insn, NewMap}. + +keep_src(Op, Src, Dst, Map) -> + %% Pop Dst but keep Src in stack if it is there. + {SwitchInsn, NewMap0} = switch_first(Dst, Map), + NewSrc = get_new_opnd(Src, NewMap0), + NewMap = pop(NewMap0), + Insn = format_fp_binop(Op, NewSrc, mk_st(0)), + {SwitchInsn ++ Insn ++ pop_insn(), NewMap}. + +keep_dst(Op, Src, Dst, Map) -> + %% Keep Dst but pop Src. + %% Dst must be in stack. + DstInMap = in_map(Dst, Map), + SrcInMap = in_map(Src, Map), + case SrcInMap of + true -> + case DstInMap of + true -> + %% Src must be popped. If Dst is on top of the stack we can + %% alter the operation rather than shuffle the stack. + {SwitchInsn, Insn, NewMap} = + if hd(Map) =:= Dst -> + NewOp = mk_op_pop(reverse_op(Op)), + NewDst = get_new_opnd(Src, Map), + TmpMap = lists:map(fun(X) -> + if X =:= Src -> Dst; true -> X end + end, Map), + {[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)}; + true -> + {SwitchInsn1, NewMap0} = switch_first(Src, Map), + NewDst = get_new_opnd(Dst,NewMap0), + NewOp = mk_op_pop(Op), + {SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)} + end, + {SwitchInsn ++ Insn, NewMap}; + _ -> + %% Src is on the stack, but Dst isn't. Use memory command to avoid + %% unnecessary loading instructions. + {SwitchInsn, NewMap0} = switch_first(Src, Map), + NewOp = reverse_op(Op), + NewMap = [Dst] ++ tl(NewMap0), + Insn = format_fp_binop(NewOp, Dst, mk_st(0)), + {SwitchInsn ++ Insn, NewMap} + end; + _ -> + %% Src isn't in the map so it doesn't have to be popped. + {SwitchInsn, NewMap} = switch_first(Dst, Map), + {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}], NewMap} + end. + +keep_none(Op, Src, Dst, Map) -> + %% Dst must be on stack. + {PushInsn, NewMap0} = + case in_map(Dst, Map) of + true -> {[], Map}; + _ -> push(Dst, Map) + end, + case in_map(Src, NewMap0) of + true -> + %% Src must be popped. + {SwitchInsn1, NewMap1} = switch_first(Src, NewMap0), + NewOp = mk_op_pop(Op), + NewDst = get_new_opnd(Dst,NewMap1), + NewMap2 = pop(NewMap1), + %% Then Dst has to be popped. + {PopInsn, NewMap} = pop_member(Dst, NewMap2), + Insn = format_fp_binop(NewOp, mk_st(0), NewDst), + {PushInsn ++ SwitchInsn1 ++ Insn ++ PopInsn, NewMap}; + _ -> + %% Src isn't in the map so it doesn't have to be popped. + {SwitchInsn, NewMap1} = switch_first(Dst, NewMap0), + NewMap = pop(NewMap1), + {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}] ++ pop_insn(), NewMap} + end. + +format_fp_binop(Op, Src = #x86_temp{}, Dst = #x86_fpreg{reg = Reg}) -> + %% Handle that st(0) is sometimes implicit. + if Reg =:= 0 -> [hipe_x86:mk_fp_unop(Op, Src)]; + true -> [hipe_x86:mk_fp_binop(Op, Src, Dst)] + end; +format_fp_binop(Op, Src, Dst) -> + [hipe_x86:mk_fp_binop(Op, Src, Dst)]. + +in_map(X, Map) -> + lists:member(X, Map). + +push_list(L, Map) -> + push_list(L, Map, []). +push_list([H|T], Map, Acc) -> + {Insn, NewMap} = push(H,Map), + push_list(T, NewMap, Acc++Insn); +push_list([], Map, Acc) -> + {Acc, Map}. + +push(X, Map0) -> + {PopInsn, Map} = + if length(Map0) > 7 -> pop_a_temp(Map0); + true -> {[], Map0} + end, + NewX = get_new_opnd(X,Map), + NewMap = [X | Map], + PushOp = [hipe_x86:mk_fp_unop(fld, NewX)], + {PopInsn ++ PushOp, NewMap}. + +pop([_|Map]) -> + Map. + +pop_insn() -> + [hipe_x86:mk_fp_unop('fstp',mk_st(0))]. + +pop_dead(Dead, Map) -> + Dead0 = [X || X <- Map, lists:member(X,Dead)], + pop_dead(Dead0, Map, []). + +pop_dead([D|Dead], Map, Code) -> + {I, NewMap0} = switch_first(D, Map), + NewMap = pop(NewMap0), + Store = case D of + #x86_temp{} -> [hipe_x86:mk_fp_unop('fstp', D)]; + _ -> pop_insn() + end, + pop_dead(Dead, NewMap, Code++I++Store); +pop_dead([], Map, Code) -> + {Code,Map}. + +pop_all(Map) -> + {Code, _} = pop_dead(Map, Map), + Code. + +pop_member(Member, Map) -> + {Head,[_|T]} = lists:splitwith(fun(X)-> X =/= Member end, Map), + {[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))], + Head++T}. + +pop_a_temp(Map) -> + Temp = find_a_temp(Map), + {SwitchInsn, NewMap0} = switch_first(Temp, Map), + NewMap = pop(NewMap0), + {SwitchInsn ++ [hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}. + +find_a_temp([H = #x86_temp{}|_]) -> + H; +find_a_temp([_|T]) -> + find_a_temp(T); +find_a_temp([]) -> + ?EXIT({noTempOnFPStack,{}}). + +switch_first(X, Map = [H|_]) -> + Pos = get_pos(X, Map, 0), + case Pos of + 0 -> + {[], Map}; + notFound -> + push(X, Map); + _ -> + {[_|Head], [_|Tail]} = lists:splitwith(fun(Y)-> Y =/= X end, Map), + NewMap = [X|Head] ++ [H|Tail], + Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)), + {[Ins], NewMap} + end; +switch_first(X, Map) -> + push(X, Map). + +get_pos(X, [H|T], Pos) -> + if X =:= H -> Pos; + true -> get_pos(X, T, Pos+1) + end; +get_pos(_, [], _) -> + notFound. + +get_new_opnd(X, Map) -> + I = get_pos(X, Map, 0), + case I of + notFound -> + %% The operand is probably a spilled float. + X; + _ -> + mk_st(I) + end. + +is_fp(#x86_fpreg{}) -> + true; +is_fp(#x86_mem{type = Type}) -> + Type =:= 'double'; +is_fp(#x86_temp{type = Type}) -> + Type =:= 'double'. + +handle_insn(I) -> + case I of + #fmove{} -> true; + #fp_unop{} -> true; + #fp_binop{} -> true; + #pseudo_call{} ->true; + %% #ret{} -> true; + _ -> false + end. + +is_liveOut(X, LiveOut) -> + ordsets:is_element(X, LiveOut). + +mk_st(X) -> + hipe_x86:mk_fpreg(X, false). + +reverse_op(Op) -> + case Op of + 'fsub' -> 'fsubr'; + 'fdiv' -> 'fdivr'; + 'fsubr'-> 'fsub'; + 'fdivr' -> 'fdiv'; + _ -> Op + end. + +mk_op_pop(Op) -> + case Op of + 'fadd'-> 'faddp'; + 'fdiv' -> 'fdivp'; + 'fdivr' -> 'fdivrp'; + 'fmul' -> 'fmulp'; + 'fsub' -> 'fsubp'; + 'fsubr' -> 'fsubrp'; + _ -> ?EXIT({operandHasNoPopVariant,{Op}}) + end. + +butlast([X|Xs]) -> butlast(Xs,X). + +butlast([],_) -> []; +butlast([X|Xs],Y) -> [Y|butlast(Xs,X)]. + +%%pp_insn(Op, Src, Dst) -> +%% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]). + +%%pp([I|Ins]) -> +%% hipe_x86_pp:pp_insn(I), +%% pp(Ins); +%%pp([]) -> +%% []. + +pseudo_pop(Map) when length(Map) > 0 -> + Dst = hipe_x86:mk_new_temp('double'), + pseudo_pop(Dst, length(Map), []); +pseudo_pop(_) -> + []. + +pseudo_pop(Dst, St, Acc) when St > 1 -> + %% Store all members of the stack to a single temporary to force + %% any floating point overflow exceptions to occur even though we + %% don't have overflow for the extended double precision in the x87. + pseudo_pop(Dst, St-1, + [hipe_x86:mk_fp_unop('fxch', mk_st(St-1)), + hipe_x86:mk_fp_unop('fst', Dst), + hipe_x86:mk_fp_unop('fxch', mk_st(St-1)) + |Acc]); +pseudo_pop(Dst, _St, Acc) -> + [hipe_x86:mk_fp_unop('fst', Dst)|Acc]. -- cgit v1.2.3