aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/x86
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/x86
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/hipe/x86')
-rw-r--r--lib/hipe/x86/Makefile134
-rw-r--r--lib/hipe/x86/NOTES.OPTIM200
-rw-r--r--lib/hipe/x86/NOTES.RA32
-rw-r--r--lib/hipe/x86/TODO31
-rw-r--r--lib/hipe/x86/hipe_rtl_to_x86.erl865
-rw-r--r--lib/hipe/x86/hipe_x86.erl496
-rw-r--r--lib/hipe/x86/hipe_x86.hrl116
-rw-r--r--lib/hipe/x86/hipe_x86_assemble.erl1014
-rw-r--r--lib/hipe/x86/hipe_x86_cfg.erl147
-rw-r--r--lib/hipe/x86/hipe_x86_defuse.erl160
-rw-r--r--lib/hipe/x86/hipe_x86_encode.erl1302
-rw-r--r--lib/hipe/x86/hipe_x86_encode.txt213
-rw-r--r--lib/hipe/x86/hipe_x86_frame.erl687
-rw-r--r--lib/hipe/x86/hipe_x86_liveness.erl57
-rw-r--r--lib/hipe/x86/hipe_x86_main.erl70
-rw-r--r--lib/hipe/x86/hipe_x86_postpass.erl276
-rw-r--r--lib/hipe/x86/hipe_x86_pp.erl350
-rw-r--r--lib/hipe/x86/hipe_x86_ra.erl99
-rw-r--r--lib/hipe/x86/hipe_x86_ra_finalise.erl335
-rw-r--r--lib/hipe/x86/hipe_x86_ra_ls.erl85
-rw-r--r--lib/hipe/x86/hipe_x86_ra_naive.erl409
-rw-r--r--lib/hipe/x86/hipe_x86_ra_postconditions.erl452
-rw-r--r--lib/hipe/x86/hipe_x86_ra_x87_ls.erl63
-rw-r--r--lib/hipe/x86/hipe_x86_registers.erl254
-rw-r--r--lib/hipe/x86/hipe_x86_spill_restore.erl345
-rw-r--r--lib/hipe/x86/hipe_x86_x87.erl635
26 files changed, 8827 insertions, 0 deletions
diff --git a/lib/hipe/x86/Makefile b/lib/hipe/x86/Makefile
new file mode 100644
index 0000000000..065b56fce3
--- /dev/null
+++ b/lib/hipe/x86/Makefile
@@ -0,0 +1,134 @@
+#
+# %CopyrightBegin%
+#
+# Copyright Ericsson AB 2001-2009. All Rights Reserved.
+#
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+#
+# %CopyrightEnd%
+#
+
+ifndef EBIN
+EBIN = ../ebin
+endif
+
+ifndef DOCS
+DOCS = ../doc
+endif
+
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(HIPE_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+# Please keep this list sorted.
+MODULES=hipe_rtl_to_x86 \
+ hipe_x86 \
+ hipe_x86_assemble \
+ hipe_x86_cfg \
+ hipe_x86_defuse \
+ hipe_x86_encode \
+ hipe_x86_frame \
+ hipe_x86_liveness \
+ hipe_x86_main \
+ hipe_x86_postpass \
+ hipe_x86_pp \
+ hipe_x86_ra \
+ hipe_x86_ra_finalise \
+ hipe_x86_ra_ls \
+ hipe_x86_ra_naive \
+ hipe_x86_ra_postconditions \
+ hipe_x86_ra_x87_ls \
+ hipe_x86_registers \
+ hipe_x86_spill_restore \
+ hipe_x86_x87
+
+HRL_FILES=hipe_x86.hrl
+ERL_FILES=$(MODULES:%=%.erl)
+TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR))
+DOC_FILES= $(MODULES:%=$(DOCS)/%.html)
+
+# APP_FILE=
+# APP_SRC=$(APP_FILE).src
+# APP_TARGET=$(EBIN)/$(APP_FILE)
+#
+# APPUP_FILE=
+# APPUP_SRC=$(APPUP_FILE).src
+# APPUP_TARGET=$(EBIN)/$(APPUP_FILE)
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+
+include ../native.mk
+
+ERL_COMPILE_FLAGS += +warn_exported_vars
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+debug opt: $(TARGET_FILES)
+
+docs: $(DOC_FILES)
+
+clean:
+ rm -f $(TARGET_FILES)
+ rm -f core
+
+$(DOCS)/%.html:%.erl
+ erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+ $(INSTALL_DIR) $(RELSYSDIR)/ebin
+ $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+
+release_docs_spec:
+
+# Please keep this list sorted.
+$(EBIN)/hipe_rtl_to_x86.beam: ../rtl/hipe_rtl.hrl
+$(EBIN)/hipe_x86_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl
+$(EBIN)/hipe_x86_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc
+$(EBIN)/hipe_x86_frame.beam: ../rtl/hipe_literals.hrl
+$(EBIN)/hipe_x86_liveness.beam: ../flow/liveness.inc
+$(EBIN)/hipe_x86_main.beam: ../main/hipe.hrl
+$(EBIN)/hipe_x86_ra: ../main/hipe.hrl
+$(EBIN)/hipe_x86_ra_dummy.beam: ../main/hipe.hrl
+$(EBIN)/hipe_x86_ra_ls.beam: ../main/hipe.hrl
+$(EBIN)/hipe_x86_ra_postconditions.beam: ../main/hipe.hrl
+$(EBIN)/hipe_x86_ra_x87_ls.beam: ../main/hipe.hrl
+$(EBIN)/hipe_x86_registers.beam: ../rtl/hipe_literals.hrl
+$(EBIN)/hipe_x86_spill_restore.beam: ../main/hipe.hrl ../flow/cfg.hrl
+$(EBIN)/hipe_x86_x87.beam: ../main/hipe.hrl
+
+$(TARGET_FILES): hipe_x86.hrl ../misc/hipe_consttab.hrl
diff --git a/lib/hipe/x86/NOTES.OPTIM b/lib/hipe/x86/NOTES.OPTIM
new file mode 100644
index 0000000000..4c241cacb4
--- /dev/null
+++ b/lib/hipe/x86/NOTES.OPTIM
@@ -0,0 +1,200 @@
+$Id$
+
+Partial x86 code optimisation guide
+===================================
+Priority should be given to P6 and P4, then K7,
+then P5, and last to K6.
+
+Rules that are blatantly obvious or irrelevant for HiPE are
+generally not listed. These includes things like alignment
+of basic data types, store-forwarding rules when alignment
+or sizes don't match, and partial register stalls.
+
+Intel P4
+--------
+The P6 4-1-1 insn decode template no longer applies.
+
+Simple insns (add/sub/cmp/test/and/or/xor/neg/not/mov/sahf)
+are twice as fast as in P6.
+
+Shifts are "movsx" (sign-extend) are slower than in P6.
+
+Always avoid "inc" and "dec", use "add" and "sub" instead,
+due to condition codes dependencies overhead.
+
+"fxch" is slightly more expensive than in P6, where it was free.
+
+Use "setcc" or "cmov" to eliminate unpredictable branches.
+
+For hot code executing out of the trace cache, alignment of
+branch targets is less of an issue compared to P6.
+
+Do use "fxch" to simulate a flat FP register file, but only
+for that purpose, not for manual scheduling for parallelism.
+
+Using "lea" is highly recommended.
+
+Eliminate redundant loads. Use regs as much as possible.
+
+Left shifts up to 3 have longer latencies than the equivalent
+sequence of adds.
+
+Do utilise the addressing modes, to save registers and trace
+cache bandwidth.
+
+"xor reg,reg" or "sub reg,reg" preferred over moving zero to reg.
+
+"test reg,reg" preferred over "cmp" with zero or "and".
+
+Avoid explicit cmp/test;jcc if the preceeding insn (alu, but not
+mov or lea) set the condition codes.
+
+Load-execute alu insns (mem src) are Ok.
+
+Add-reg-to-mem slightly better than add-mem-to-reg.
+
+Add-reg-to-mem is better than load;add;store.
+
+Intel P6
+--------
+4-1-1 instruction decoding template: can decode one semi-complex
+(max 4 uops) and two simple (1 uop) insns per clock; follow a
+complex insn by two simple ones, otherwise the decoders will stall.
+
+Load-execute (mem src) alu insns are 2 uops.
+Read-modify-write (mem dst) alu insns are 4 uops.
+
+Insns longer than 7 bytes block parallel decoding.
+Avoid insns longer than 7 bytes.
+
+Lea is useful.
+
+"movzx" is preferred for zero-extension; the xor;mov alternative
+causes a partial register stall.
+
+Use "test" instead of "cmp" with zero.
+
+Pull address calculations into load and store insn addressing modes.
+
+Clear a reg with "xor", not by moving zero to it.
+
+Many alu insns set the condition codes. Replace "alu;cmp;jcc"
+with "alu;jcc". This is not applicable for "mov" or "lea".
+
+For FP code, simulate a flat register file on the x87 stack by
+using fxch to reorder it.
+
+AMD K7
+------
+Select DirectPath insns. Avoid VectorPath insns due to slower decode.
+
+Alu insns with mem src are very efficient.
+Alu insns with mem dst are very efficient.
+
+Fetches from I-cache are 16-byte aligned. Align functions and frequently
+used labels at or near the start of 16-byte aligned blocks.
+
+"movzx" preferred over "xor;mov" for zero-extension.
+
+"push mem" preferred over "load;push reg".
+
+"xor reg,reg" preferred over moving zero to the reg.
+
+"test" preferred over "cmp".
+
+"pop" insns are VectorPath. "pop mem" has latency 3, "pop reg" has
+latency 4.
+
+"push reg" and "push imm" are DirectPath, "push mem" is VectorPath.
+The latency is 3 clocks.
+
+Intel P5
+--------
+If a loop header is less than 8 bytes away from a 16-byte
+boundary, align it to the 16-byte boundary.
+
+If a return address is less than 8 bytes away from a 16-byte
+boundary, align it to the 16-byte boundary.
+
+Align function entry points to 16-byte boundaries.
+
+Ensure that doubles are 64-bit aligned.
+
+Data cache line size is 32 bytes. The whole line is brought
+in on a read miss.
+
+"push mem" is not pairable; loading a temp reg and pushing
+the reg pairs better -- this is also faster on the 486.
+
+No conditional move instruction.
+
+Insns longer than 7 bytes can't go down the V-pipe or share
+the insn FIFO with other insns.
+Avoid insns longer than 7 bytes.
+
+Lea is useful when it replaces several other add/shift insns.
+Lea is not a good replacement for a single shl since a scaled
+index requires a disp32 (or base), making the insn longer.
+
+"movzx" is worse than the xor;mov alternative -- the opcode
+prefix causes a slowdown and it is not pariable.
+
+Use "test" instead of "cmp" with zero.
+
+"test eax,imm" and "test reg,reg" are pairable, other forms are not.
+
+Pull address calculations into load and store insn addressing modes.
+
+Clear a reg with "xor", not by moving zero to it.
+
+Many alu insns set the condition codes. Replace "alu;cmp;jcc"
+with "alu;jcc". This is not applicable for "mov" or "lea".
+
+For FP code, simulate a flat register file on the x87 stack by
+using fxch to reorder it.
+
+"neg" and "not" are not pairable. "test imm,reg" and "test imm,mem"
+are not pairable. Shifts by "cl" are not pairable. Shifts by "1" or
+"imm" are pairable but only execute in the U-pipe.
+
+AMD K6
+------
+The insn size predecoder has a 3-byte window. Insns with both prefix
+and SIB bytes cannot be short-decoded.
+
+Use short and simple insns, including mem src alu insns.
+
+Avoid insns longer than 7 bytes. They cannot be short-decoded.
+Short-decode: max 7 bytes, max 2 uops.
+Long-decode: max 11 bytes, max 4 uops.
+Vector-decode: longer than 11 bytes or more than 4 uops.
+
+Prefer read-modify-write alu insns (mem dst) over "load;op;store"
+sequences, for code density and register pressure reasons.
+
+Avoid the "(esi)" addressing mode: it forces the insn to be vector-decoded.
+Use a different reg or add an explicit zero displacement.
+
+"add reg,reg" preferred over a shl by 1, it parallelises better.
+
+"movzx" preferred over "xor;mov" for zero-extension.
+
+Moving zero to a reg preferred over "xor reg,reg" due to dependencies
+and condition codes overhead.
+
+"push mem" preferred over "load;push reg" due to code density and
+register pressure. (Page 64.)
+Explicit moves preferred when pushing args for fn calls, due to
+%esp dependencies and random access possibility. (Page 58.)
+[hmm, these two are in conflict]
+
+There is no penalty for seg reg prefix unless there are multiple prefixes.
+
+Align function entries and frequent branch targets to 16-byte boundaries.
+
+Shifts by imm only go down one of the pipes.
+
+"test reg,reg" preferred over "cmp" with zero.
+"test reg,imm" is a long-decode insn.
+
+No conditional move insn.
diff --git a/lib/hipe/x86/NOTES.RA b/lib/hipe/x86/NOTES.RA
new file mode 100644
index 0000000000..ce80411642
--- /dev/null
+++ b/lib/hipe/x86/NOTES.RA
@@ -0,0 +1,32 @@
+$Id$
+
+Register Allocation
+===================
+
+These are the rules that HiPE x86 register allocators must abide by.
+
+- Before RA, every Temp (precoloured or pseudo) is semantically
+ equivalent to Reg. Any operand may be Temp.
+
+- Before RA, only FIXED registers may occur in precoloured Temps.
+ Exception 1 is move: src or dst may be an argument register.
+ Exception 2 is call: the dst (if any) must be %eax.
+
+- After RA, an operand (src or dst) may refer to at most one memory cell.
+ Therefore, a pseudo-Temp MAY NOT occur as base or offset in an
+ explicit memory operand after RA.
+
+- After RA, a binary operation (alu, cmp, move) may refer to at most
+ one memory cell. Therefore, AT MOST ONE of src and dst may be a
+ pseudo-Temp after RA. If one of the operands (src or dst) is an
+ explicit memory operand, then the other operand MUST NOT be a
+ pseudo-Temp after RA.
+
+- After RA, the index in a jmp_switch must be a register.
+
+- After RA, the temp in a lea must be a register.
+
+- After RA, the temp in an imul must be a register.
+
+- After RA, a function's formal parameters must reside on the stack.
+ Therefore, the RA MUST NOT map the formals to actual registers.
diff --git a/lib/hipe/x86/TODO b/lib/hipe/x86/TODO
new file mode 100644
index 0000000000..7c93f7daf3
--- /dev/null
+++ b/lib/hipe/x86/TODO
@@ -0,0 +1,31 @@
+rtl_to_x86:
+* recognise alub(X,X,sub,1,lt,L1,L2,P) and turn it into 'dec',
+ this might improve the reduction test code slightly (X is
+ the pseudo for FCALLS)
+* recognise alu(Z,X,add,Y) and turn it into 'lea'.
+* rewrite tailcalls as parallel assignments before regalloc
+
+x86:
+* Use separate constructors for real regs (x86_reg) and pseudos (x86_temp).
+
+Frame:
+* drop tailcall rewrite
+
+Registers:
+* make the 2 regs now reserved for frame's tailcall rewrite available for arg passing
+
+Optimizations:
+* replace jcc cc,L1; jmp L0; L1: with jcc <not cc> L0; L1: (length:len/2)
+* Kill move X,X insns, either in frame or finalise
+* Instruction scheduling module
+* We can now choose to not have HP in %esi. However, this currently loses
+ performance due to (a) repeated moves to/from P_HP(P), and (b) spills of
+ the temp that contains a copy of P_HP(P). Both of these problems should be
+ fixed, and then, if we don't have any noticeable performance degradation, we
+ should permanently change to a non-reserved HP strategy.
+
+Loader:
+
+Assembler:
+
+Encode:
diff --git a/lib/hipe/x86/hipe_rtl_to_x86.erl b/lib/hipe/x86/hipe_rtl_to_x86.erl
new file mode 100644
index 0000000000..d77e4fed3b
--- /dev/null
+++ b/lib/hipe/x86/hipe_rtl_to_x86.erl
@@ -0,0 +1,865 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%%
+%%% Translate 3-address RTL code to 2-address pseudo-x86 code.
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(ECX, rcx).
+-define(EAX, rax).
+-else.
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(ECX, ecx).
+-define(EAX, eax).
+-endif.
+
+-module(?HIPE_RTL_TO_X86).
+-export([translate/1]).
+
+-include("../rtl/hipe_rtl.hrl").
+
+translate(RTL) -> % RTL function -> x86 defun
+ hipe_gensym:init(x86),
+ hipe_gensym:set_var(x86, ?HIPE_X86_REGISTERS:first_virtual()),
+ hipe_gensym:set_label(x86, hipe_gensym:get_label(rtl)),
+ Map0 = vmap_empty(),
+ {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0),
+ OldData = hipe_rtl:rtl_data(RTL),
+ {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData),
+ {RegFormals,_} = split_args(Formals),
+ Code =
+ case RegFormals of
+ [] -> Code0;
+ _ -> [hipe_x86:mk_label(hipe_gensym:get_next_label(x86)) |
+ move_formals(RegFormals, Code0)]
+ end,
+ IsClosure = hipe_rtl:rtl_is_closure(RTL),
+ IsLeaf = hipe_rtl:rtl_is_leaf(RTL),
+ hipe_x86:mk_defun(hipe_rtl:rtl_fun(RTL),
+ Formals,
+ IsClosure,
+ IsLeaf,
+ Code,
+ NewData,
+ [],
+ []).
+
+conv_insn_list([H|T], Map, Data) ->
+ {NewH, NewMap, NewData1} = conv_insn(H, Map, Data),
+ %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]),
+ {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1),
+ {NewH ++ NewT, NewData2};
+conv_insn_list([], _, Data) ->
+ {[], Data}.
+
+conv_insn(I, Map, Data) ->
+ case I of
+ #alu{} ->
+ %% dst = src1 binop src2
+ BinOp = conv_binop(hipe_rtl:alu_op(I)),
+ {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map),
+ {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0),
+ {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1),
+ I2 =
+ case hipe_rtl:is_shift_op(hipe_rtl:alu_op(I)) of
+ true ->
+ conv_shift(Dst, Src1, BinOp, Src2);
+ false ->
+ conv_alu(Dst, Src1, BinOp, Src2, [])
+ end,
+ {FixSrc1++FixSrc2++I2, Map2, Data};
+ #alub{} ->
+ %% dst = src1 op src2; if COND goto label
+ BinOp = conv_binop(hipe_rtl:alub_op(I)),
+ {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map),
+ {FixSrc1, Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0),
+ {FixSrc2, Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1),
+ Cc = conv_cond(hipe_rtl:alub_cond(I)),
+ I1 = [hipe_x86:mk_pseudo_jcc(Cc,
+ hipe_rtl:alub_true_label(I),
+ hipe_rtl:alub_false_label(I),
+ hipe_rtl:alub_pred(I))],
+ I2 = conv_alu(Dst, Src1, BinOp, Src2, I1),
+ {FixSrc1++FixSrc2++I2, Map2, Data};
+ #branch{} ->
+ %% <unused> = src1 - src2; if COND goto label
+ {FixSrc1, Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map),
+ {FixSrc2, Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0),
+ Cc = conv_cond(hipe_rtl:branch_cond(I)),
+ I2 = conv_branch(Src1, Cc, Src2,
+ hipe_rtl:branch_true_label(I),
+ hipe_rtl:branch_false_label(I),
+ hipe_rtl:branch_pred(I)),
+ {FixSrc1++FixSrc2++I2, Map1, Data};
+ #call{} ->
+ %% push <arg1>
+ %% ...
+ %% push <argn>
+ %% eax := call <Fun>; if exn goto <Fail> else goto Next
+ %% Next:
+ %% <Dst> := eax
+ %% goto <Cont>
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map),
+ {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0),
+ {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1),
+ I2 = conv_call(Dsts, Fun, Args,
+ hipe_rtl:call_continuation(I),
+ hipe_rtl:call_fail(I),
+ hipe_rtl:call_type(I)),
+ %% XXX Fixme: this ++ is probably inefficient.
+ {FixArgs++I2, Map2, Data};
+ #comment{} ->
+ I2 = [hipe_x86:mk_comment(hipe_rtl:comment_text(I))],
+ {I2, Map, Data};
+ #enter{} ->
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map),
+ {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0),
+ I2 = conv_tailcall(Fun, Args, hipe_rtl:enter_type(I)),
+ {FixArgs++I2, Map1, Data};
+ #goto{} ->
+ I2 = [hipe_x86:mk_jmp_label(hipe_rtl:goto_label(I))],
+ {I2, Map, Data};
+ #label{} ->
+ I2 = [hipe_x86:mk_label(hipe_rtl:label_name(I))],
+ {I2, Map, Data};
+ #load{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:load_src(I), Map0),
+ {FixOff, Off, Map2} = conv_src(hipe_rtl:load_offset(I), Map1),
+ I2 = case {hipe_rtl:load_size(I), hipe_rtl:load_sign(I)} of
+ {byte, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)];
+ {byte, unsigned} ->
+ [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'byte'), Dst)];
+ {int16, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)];
+ {int16, unsigned} ->
+ [hipe_x86:mk_movzx(hipe_x86:mk_mem(Src, Off, 'int16'), Dst)];
+ {LoadSize, LoadSign} ->
+ mk_load(LoadSize, LoadSign, Src, Off, Dst)
+ end,
+ {FixSrc++FixOff++I2, Map2, Data};
+ #load_address{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map),
+ Addr = hipe_rtl:load_address_addr(I),
+ Type = hipe_rtl:load_address_type(I),
+ Src = hipe_x86:mk_imm_from_addr(Addr, Type),
+ I2 = mk_load_address(Type, Src, Dst),
+ {I2, Map0, Data};
+ #load_atom{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map),
+ Src = hipe_x86:mk_imm_from_atom(hipe_rtl:load_atom_atom(I)),
+ I2 = [hipe_x86:mk_move(Src, Dst)],
+ {I2, Map0, Data};
+ #move{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0),
+ I2 = [hipe_x86:mk_move(Src, Dst)],
+ {FixSrc++I2, Map1, Data};
+ #return{} ->
+ {FixArgs, Args, Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map),
+ %% frame will fill in npop later, hence the "mk_ret(-1)"
+ I2 = move_retvals(Args, [hipe_x86:mk_ret(-1)]),
+ {FixArgs++I2, Map0, Data};
+ #store{} ->
+ {Ptr, Map0} = conv_dst(hipe_rtl:store_base(I), Map),
+ {FixSrc, Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0),
+ {FixOff, Off, Map2} = conv_src(hipe_rtl:store_offset(I), Map1),
+ I2 = mk_store(hipe_rtl:store_size(I), Src, Ptr, Off),
+ {FixSrc++FixOff++I2, Map2, Data};
+ #switch{} -> % this one also updates Data :-(
+ %% from hipe_rtl2sparc, but we use a hairy addressing mode
+ %% instead of doing the arithmetic manually
+ Labels = hipe_rtl:switch_labels(I),
+ LMap = [{label,L} || L <- Labels],
+ {NewData, JTabLab} =
+ case hipe_rtl:switch_sort_order(I) of
+ [] ->
+ hipe_consttab:insert_block(Data, word, LMap);
+ SortOrder ->
+ hipe_consttab:insert_sorted_block(
+ Data, word, LMap, SortOrder)
+ end,
+ %% no immediates allowed here
+ {Index, Map1} = conv_dst(hipe_rtl:switch_src(I), Map),
+ I2 = mk_jmp_switch(Index, JTabLab, Labels),
+ {I2, Map1, NewData};
+ #fload{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fload_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fload_src(I), Map0),
+ {[], Off, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1),
+ I2 = [hipe_x86:mk_fmove(hipe_x86:mk_mem(Src, Off, 'double'),Dst)],
+ {I2, Map2, Data};
+ #fstore{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fstore_src(I), Map0),
+ {[], Off, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1),
+ I2 = [hipe_x86:mk_fmove(Src, hipe_x86:mk_mem(Dst, Off, 'double'))],
+ {I2, Map2, Data};
+ #fp{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fp_dst(I), Map),
+ {[], Src1, Map1} = conv_src(hipe_rtl:fp_src1(I), Map0),
+ {[], Src2, Map2} = conv_src(hipe_rtl:fp_src2(I), Map1),
+ FpBinOp = conv_fp_binop(hipe_rtl:fp_op(I)),
+ I2 = conv_fp_binary(Dst, Src1, FpBinOp, Src2),
+ {I2, Map2, Data};
+ #fp_unop{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fp_unop_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fp_unop_src(I), Map0),
+ FpUnOp = conv_fp_unop(hipe_rtl:fp_unop_op(I)),
+ I2 = conv_fp_unary(Dst, Src, FpUnOp),
+ {I2, Map1, Data};
+ #fmove{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fmove_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fmove_src(I), Map0),
+ I2 = [hipe_x86:mk_fmove(Src, Dst)],
+ {I2, Map1, Data};
+ #fconv{} ->
+ {Dst, Map0} = conv_dst(hipe_rtl:fconv_dst(I), Map),
+ {[], Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0),
+ I2 = [hipe_x86:mk_fmove(Src, Dst)],
+ {I2, Map1, Data};
+ X ->
+ %% gctest??
+ %% jmp, jmp_link, jsr, esr, multimove,
+ %% stackneed, pop_frame, restore_frame, save_frame
+ throw({?MODULE, {"unknown RTL instruction", X}})
+ end.
+
+%%% Finalise the conversion of a 3-address ALU operation, taking
+%%% care to not introduce more temps and moves than necessary.
+
+conv_alu(Dst, Src1, 'imul', Src2, Tail) ->
+ mk_imul(Src1, Src2, Dst, Tail);
+conv_alu(Dst, Src1, BinOp, Src2, Tail) ->
+ case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_move(Src1, Dst), % z = x
+ hipe_x86:mk_alu(BinOp, Src2, Dst) | Tail]; % z op= y
+ true -> % y = x op y, where y != x
+ case binop_commutes(BinOp) of
+ true -> % y = y op x
+ [hipe_x86:mk_alu(BinOp, Src1, Dst) | Tail]; % y op= x
+ false -> % y = x op y, where op doesn't commute
+ Tmp = clone_dst(Dst),
+ [hipe_x86:mk_move(Src1, Tmp), % t = x
+ hipe_x86:mk_alu(BinOp, Src2, Tmp), % t op= y
+ hipe_x86:mk_move(Tmp, Dst) | Tail] % y = t
+ end
+ end
+ end.
+
+mk_imul(Src1, Src2, Dst, Tail) ->
+ case hipe_x86:is_imm(Src1) of
+ true ->
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ mk_imul_iit(Src1, Src2, Dst, Tail);
+ _ ->
+ mk_imul_itt(Src1, Src2, Dst, Tail)
+ end;
+ _ ->
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ mk_imul_itt(Src2, Src1, Dst, Tail);
+ _ ->
+ mk_imul_ttt(Src1, Src2, Dst, Tail)
+ end
+ end.
+
+mk_imul_iit(Src1, Src2, Dst, Tail) ->
+ io:format("~w: RTL mul with two immediates\n", [?MODULE]),
+ Tmp2 = new_untagged_temp(),
+ [hipe_x86:mk_move(Src2, Tmp2) |
+ mk_imul_itt(Src1, Tmp2, Dst, Tail)].
+
+mk_imul_itt(Src1, Src2, Dst, Tail) ->
+ [hipe_x86:mk_imul(Src1, Src2, Dst) | Tail].
+
+mk_imul_ttt(Src1, Src2, Dst, Tail) ->
+ case same_opnd(Dst, Src1) of
+ true ->
+ [hipe_x86:mk_imul([], Src2, Dst) | Tail];
+ false ->
+ case same_opnd(Dst, Src2) of
+ true ->
+ [hipe_x86:mk_imul([], Src1, Dst) | Tail];
+ false ->
+ [hipe_x86:mk_move(Src1, Dst),
+ hipe_x86:mk_imul([], Src2, Dst) | Tail]
+ end
+ end.
+
+conv_shift(Dst, Src1, BinOp, Src2) ->
+ {NewSrc2,I1} =
+ case hipe_x86:is_imm(Src2) of
+ true ->
+ {Src2, []};
+ false ->
+ NewSrc = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?ECX(), 'untagged'),
+ {NewSrc, [hipe_x86:mk_move(Src2, NewSrc)]}
+ end,
+ I2 = case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_shift(BinOp, NewSrc2, Dst)]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_move(Src1, Dst), % z = x
+ hipe_x86:mk_shift(BinOp, NewSrc2, Dst)];% z op= y
+ true -> % y = x op y, no shift op commutes
+ Tmp = clone_dst(Dst),
+ [hipe_x86:mk_move(Src1, Tmp), % t = x
+ hipe_x86:mk_shift(BinOp, NewSrc2, Tmp), % t op= y
+ hipe_x86:mk_move(Tmp, Dst)] % y = t
+ end
+ end,
+ I1 ++ I2.
+
+%%% Finalise the conversion of a conditional branch operation, taking
+%%% care to not introduce more temps and moves than necessary.
+
+conv_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+ case hipe_x86:is_imm(Src1) of
+ false ->
+ mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred);
+ true ->
+ case hipe_x86:is_imm(Src2) of
+ false ->
+ NewCc = commute_cc(Cc),
+ mk_branch(Src2, NewCc, Src1, TrueLab, FalseLab, Pred);
+ true ->
+ %% two immediates, let the optimiser clean it up
+ Tmp = new_untagged_temp(),
+ [hipe_x86:mk_move(Src1, Tmp) |
+ mk_branch(Tmp, Cc, Src2, TrueLab, FalseLab, Pred)]
+ end
+ end.
+
+mk_branch(Src1, Cc, Src2, TrueLab, FalseLab, Pred) ->
+ %% PRE: not(is_imm(Src1))
+ [hipe_x86:mk_cmp(Src2, Src1),
+ hipe_x86:mk_pseudo_jcc(Cc, TrueLab, FalseLab, Pred)].
+
+%%% Convert an RTL ALU or ALUB binary operator.
+
+conv_binop(BinOp) ->
+ case BinOp of
+ 'add' -> 'add';
+ 'sub' -> 'sub';
+ 'or' -> 'or';
+ 'and' -> 'and';
+ 'xor' -> 'xor';
+ 'sll' -> 'shl';
+ 'srl' -> 'shr';
+ 'sra' -> 'sar';
+ 'mul' -> 'imul';
+ %% andnot ???
+ _ -> exit({?MODULE, {"unknown binop", BinOp}})
+ end.
+
+binop_commutes(BinOp) ->
+ case BinOp of
+ 'add' -> true;
+ 'or' -> true;
+ 'and' -> true;
+ 'xor' -> true;
+ _ -> false
+ end.
+
+%%% Convert an RTL conditional operator.
+
+conv_cond(Cond) ->
+ case Cond of
+ eq -> 'e';
+ ne -> 'ne';
+ gt -> 'g';
+ gtu -> 'a';
+ ge -> 'ge';
+ geu -> 'ae';
+ lt -> 'l';
+ ltu -> 'b';
+ le -> 'le';
+ leu -> 'be';
+ overflow -> 'o';
+ not_overflow -> 'no';
+ _ -> exit({?MODULE, {"unknown rtl cond", Cond}})
+ end.
+
+commute_cc(Cc) -> % if x Cc y, then y commute_cc(Cc) x
+ case Cc of
+ 'e' -> 'e'; % ==, ==
+ 'ne' -> 'ne'; % !=, !=
+ 'g' -> 'l'; % >, <
+ 'a' -> 'b'; % >u, <u
+ 'ge' -> 'le'; % >=, <=
+ 'ae' -> 'be'; % >=u, <=u
+ 'l' -> 'g'; % <, >
+ 'b' -> 'a'; % <u, >u
+ 'le' -> 'ge'; % <=, >=
+ 'be' -> 'ae'; % <=u, >=u
+ %% overflow/not_overflow: n/a
+ _ -> exit({?MODULE, {"unknown cc", Cc}})
+ end.
+
+%%% Test if Dst and Src are the same operand.
+
+same_opnd(Dst, Src) -> Dst =:= Src.
+
+%%% Finalise the conversion of a tailcall instruction.
+
+conv_tailcall(Fun, Args, Linkage) ->
+ Arity = length(Args),
+ {RegArgs,StkArgs} = split_args(Args),
+ move_actuals(RegArgs,
+ [hipe_x86:mk_pseudo_tailcall_prepare(),
+ hipe_x86:mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage)]).
+
+split_args(Args) ->
+ split_args(0, ?HIPE_X86_REGISTERS:nr_args(), Args, []).
+split_args(I, N, [Arg|Args], RegArgs) when I < N ->
+ Reg = ?HIPE_X86_REGISTERS:arg(I),
+ Temp = hipe_x86:mk_temp(Reg, 'tagged'),
+ split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]);
+split_args(_, _, StkArgs, RegArgs) ->
+ {RegArgs, StkArgs}.
+
+move_actuals([], Rest) -> Rest;
+move_actuals([{Src,Dst}|Actuals], Rest) ->
+ move_actuals(Actuals, [hipe_x86:mk_move(Src, Dst) | Rest]).
+
+move_formals([], Rest) -> Rest;
+move_formals([{Dst,Src}|Formals], Rest) ->
+ move_formals(Formals, [hipe_x86:mk_move(Src, Dst) | Rest]).
+
+%%% Finalise the conversion of a call instruction.
+
+conv_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) ->
+ case hipe_x86:is_prim(Fun) of
+ true ->
+ conv_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage);
+ false ->
+ conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage)
+ end.
+
+conv_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) ->
+ case hipe_x86:prim_prim(Prim) of
+ 'fwait' ->
+ conv_fwait_call(Dsts, Args, ContLab, ExnLab, Linkage);
+ _ ->
+ conv_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage)
+ end.
+
+conv_fwait_call([], [], [], [], not_remote) ->
+ [hipe_x86:mk_fp_unop('fwait', [])].
+
+conv_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) ->
+ %% The backend does not support pseudo_calls without a
+ %% continuation label, so we make sure each call has one.
+ {RealContLab, Tail} =
+ case do_call_results(Dsts) of
+ [] ->
+ %% Avoid consing up a dummy basic block if the moves list
+ %% is empty, as is typical for calls to suspend/0.
+ %% This should be subsumed by a general "optimise the CFG"
+ %% module, and could probably be removed.
+ case ContLab of
+ [] ->
+ NewContLab = hipe_gensym:get_next_label(x86),
+ {NewContLab, [hipe_x86:mk_label(NewContLab)]};
+ _ ->
+ {ContLab, []}
+ end;
+ Moves ->
+ %% Change the call to continue at a new basic block.
+ %% In this block move the result registers to the Dsts,
+ %% then continue at the call's original continuation.
+ %%
+ %% This should be fixed to propagate "fallthrough calls"
+ %% When the rest of the backend supports them.
+ NewContLab = hipe_gensym:get_next_label(x86),
+ case ContLab of
+ [] ->
+ %% This is just a fallthrough
+ %% No jump back after the moves.
+ {NewContLab,
+ [hipe_x86:mk_label(NewContLab) |
+ Moves]};
+ _ ->
+ %% The call has a continuation
+ %% jump to it.
+ {NewContLab,
+ [hipe_x86:mk_label(NewContLab) |
+ Moves ++
+ [hipe_x86:mk_jmp_label(ContLab)]]}
+ end
+ end,
+ SDesc = hipe_x86:mk_sdesc(ExnLab, 0, length(Args), {}),
+ CallInsn = hipe_x86:mk_pseudo_call(Fun, SDesc, RealContLab, Linkage),
+ {RegArgs,StkArgs} = split_args(Args),
+ do_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])).
+
+do_push_args([Arg|Args], Tail) ->
+ [hipe_x86:mk_push(Arg) | do_push_args(Args, Tail)];
+do_push_args([], Tail) ->
+ Tail.
+
+%%% Move return values from the return value registers.
+
+do_call_results(DstList) ->
+ do_call_results(DstList, 0, []).
+
+do_call_results([Dst|DstList], I, Rest) ->
+ Src = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'),
+ Move = hipe_x86:mk_move(Src, Dst),
+ do_call_results(DstList, I+1, [Move|Rest]);
+do_call_results([], _, Insns) -> Insns.
+
+%%% Move return values to the return value registers.
+
+move_retvals(SrcLst, Rest) ->
+ move_retvals(SrcLst, 0, Rest).
+
+move_retvals([Src|SrcLst], I, Rest) ->
+ Dst = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:ret(I), 'tagged'),
+ Move = hipe_x86:mk_move(Src, Dst),
+ move_retvals(SrcLst, I+1, [Move|Rest]);
+move_retvals([], _, Insns) -> Insns.
+
+%%% Convert a 'fun' operand (MFA, prim, or temp)
+
+conv_fun(Fun, Map) ->
+ case hipe_rtl:is_var(Fun) of
+ true ->
+ conv_dst(Fun, Map);
+ false ->
+ case hipe_rtl:is_reg(Fun) of
+ true ->
+ conv_dst(Fun, Map);
+ false ->
+ case Fun of
+ Prim when is_atom(Prim) ->
+ {hipe_x86:mk_prim(Prim), Map};
+ {M,F,A} when is_atom(M), is_atom(F), is_integer(A) ->
+ {hipe_x86:mk_mfa(M,F,A), Map};
+ _ ->
+ exit({?MODULE,conv_fun,Fun})
+ end
+ end
+ end.
+
+%%% Convert an RTL source operand (imm/var/reg).
+
+conv_src(Opnd, Map) ->
+ case hipe_rtl:is_imm(Opnd) of
+ true ->
+ conv_imm(Opnd, Map);
+ false ->
+ {NewOpnd,NewMap} = conv_dst(Opnd, Map),
+ {[], NewOpnd, NewMap}
+ end.
+
+-ifdef(HIPE_AMD64).
+conv_imm(Opnd, Map) ->
+ ImmVal = hipe_rtl:imm_value(Opnd),
+ case is_imm64(ImmVal) of
+ true ->
+ Temp = hipe_x86:mk_new_temp('untagged'),
+ {[hipe_x86:mk_move64(hipe_x86:mk_imm(ImmVal), Temp)], Temp, Map};
+ false ->
+ {[], hipe_x86:mk_imm(ImmVal), Map}
+ end.
+
+is_imm64(Value) when is_integer(Value) ->
+ (Value < -(1 bsl (32 - 1))) or (Value > (1 bsl (32 - 1)) - 1);
+is_imm64({_,atom}) -> false; % Atoms are 32 bits.
+is_imm64({_,c_const}) -> false; % c_consts are 32 bits.
+is_imm64({_,_}) -> true . % Other relocs are 64 bits.
+-else.
+conv_imm(Opnd, Map) ->
+ {[], hipe_x86:mk_imm(hipe_rtl:imm_value(Opnd)), Map}.
+-endif.
+
+conv_src_list([O|Os], Map) ->
+ {NewInstr, V, Map1} = conv_src(O, Map),
+ {Instrs, Vs, Map2} = conv_src_list(Os, Map1),
+ {Instrs++NewInstr, [V|Vs], Map2};
+conv_src_list([], Map) ->
+ {[], [], Map}.
+
+%%% Convert an RTL destination operand (var/reg).
+
+conv_dst(Opnd, Map) ->
+ {Name, Type} =
+ case hipe_rtl:is_var(Opnd) of
+ true ->
+ {hipe_rtl:var_index(Opnd), 'tagged'};
+ false ->
+ case hipe_rtl:is_fpreg(Opnd) of
+ true ->
+ {hipe_rtl:fpreg_index(Opnd), 'double'};
+ false ->
+ {hipe_rtl:reg_index(Opnd), 'untagged'}
+ end
+ end,
+ case ?HIPE_X86_REGISTERS:is_precoloured(Name) of
+ true ->
+ case ?HIPE_X86_REGISTERS:proc_offset(Name) of
+ false ->
+ {hipe_x86:mk_temp(Name, Type), Map};
+ Offset ->
+ Preg = ?HIPE_X86_REGISTERS:proc_pointer(),
+ Pbase = hipe_x86:mk_temp(Preg, 'untagged'),
+ Poff = hipe_x86:mk_imm(Offset),
+ {hipe_x86:mk_mem(Pbase, Poff, Type), Map}
+ end;
+ false ->
+ case vmap_lookup(Map, Opnd) of
+ {value, NewTemp} ->
+ {NewTemp, Map};
+ _ ->
+ NewTemp = hipe_x86:mk_new_temp(Type),
+ {NewTemp, vmap_bind(Map, Opnd, NewTemp)}
+ end
+ end.
+
+conv_dst_list([O|Os], Map) ->
+ {Dst, Map1} = conv_dst(O, Map),
+ {Dsts, Map2} = conv_dst_list(Os, Map1),
+ {[Dst|Dsts], Map2};
+conv_dst_list([], Map) ->
+ {[], Map}.
+
+conv_formals(Os, Map) ->
+ conv_formals(?HIPE_X86_REGISTERS:nr_args(), Os, Map, []).
+
+conv_formals(N, [O|Os], Map, Res) ->
+ Type =
+ case hipe_rtl:is_var(O) of
+ true -> 'tagged';
+ false ->'untagged'
+ end,
+ Dst =
+ if N > 0 -> hipe_x86:mk_new_temp(Type); % allocatable
+ true -> hipe_x86:mk_new_nonallocatable_temp(Type)
+ end,
+ Map1 = vmap_bind(Map, O, Dst),
+ conv_formals(N-1, Os, Map1, [Dst|Res]);
+conv_formals(_, [], Map, Res) ->
+ {lists:reverse(Res), Map}.
+
+%%% typeof_src -- what's src's type?
+
+typeof_src(Src) ->
+ case hipe_x86:is_imm(Src) of
+ true ->
+ 'untagged';
+ _ ->
+ typeof_dst(Src)
+ end.
+
+%%% typeof_dst -- what's dst's type?
+
+typeof_dst(Dst) ->
+ case hipe_x86:is_temp(Dst) of
+ true ->
+ hipe_x86:temp_type(Dst);
+ _ ->
+ hipe_x86:mem_type(Dst)
+ end.
+
+%%% clone_dst -- conjure up a scratch reg with same type as dst
+
+clone_dst(Dst) ->
+ hipe_x86:mk_new_temp(typeof_dst(Dst)).
+
+%%% new_untagged_temp -- conjure up an untagged scratch reg
+
+new_untagged_temp() ->
+ hipe_x86:mk_new_temp('untagged').
+
+%%% Map from RTL var/reg operands to x86 temps.
+
+vmap_empty() ->
+ gb_trees:empty().
+
+vmap_lookup(Map, Key) ->
+ gb_trees:lookup(Key, Map).
+
+vmap_bind(Map, Key, Val) ->
+ gb_trees:insert(Key, Val, Map).
+
+%%% Finalise the conversion of a 2-address FP operation.
+
+conv_fp_unary(Dst, Src, FpUnOp) ->
+ case same_opnd(Dst, Src) of
+ true ->
+ [hipe_x86:mk_fp_unop(FpUnOp, Dst)];
+ _ ->
+ [hipe_x86:mk_fmove(Src, Dst),
+ hipe_x86:mk_fp_unop(FpUnOp, Dst)]
+ end.
+
+conv_fp_unop(RtlFpUnOp) ->
+ case RtlFpUnOp of
+ 'fchs' -> 'fchs'
+ end.
+
+%%% Finalise the conversion of a 3-address FP operation.
+
+conv_fp_binary(Dst, Src1, FpBinOp, Src2) ->
+ case same_opnd(Dst, Src1) of
+ true -> % x = x op y
+ [hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % x op= y
+ false -> % z = x op y, where z != x
+ case same_opnd(Dst, Src2) of
+ false -> % z = x op y, where z != x && z != y
+ [hipe_x86:mk_fmove(Src1, Dst), % z = x
+ hipe_x86:mk_fp_binop(FpBinOp, Src2, Dst)]; % z op= y
+ true -> % y = x op y, where y != x
+ case fp_binop_commutes(FpBinOp) of
+ true -> % y = y op x
+ [hipe_x86:mk_fp_binop(FpBinOp, Src1, Dst)]; % y op= x
+ false -> % y = x op y, where op doesn't commute
+ RevFpBinOp = reverse_fp_binop(FpBinOp),
+ [hipe_x86:mk_fp_binop(RevFpBinOp, Src1, Dst)]
+ end
+ end
+ end.
+
+%%% Convert an RTL FP binary operator.
+
+conv_fp_binop(RtlFpBinOp) ->
+ case RtlFpBinOp of
+ 'fadd' -> 'fadd';
+ 'fdiv' -> 'fdiv';
+ 'fmul' -> 'fmul';
+ 'fsub' -> 'fsub'
+ end.
+
+fp_binop_commutes(FpBinOp) ->
+ case FpBinOp of
+ 'fadd' -> true;
+ 'fmul' -> true;
+ _ -> false
+ end.
+
+reverse_fp_binop(FpBinOp) ->
+ case FpBinOp of
+ 'fsub' -> 'fsubr';
+ 'fdiv' -> 'fdivr'
+ end.
+
+%%% Create a jmp_switch instruction.
+
+-ifdef(HIPE_AMD64).
+mk_jmp_switch(Index, JTabLab, Labels) ->
+ JTabReg = hipe_x86:mk_new_temp('untagged'),
+ JTabImm = hipe_x86:mk_imm_from_addr(JTabLab, constant),
+ [hipe_x86:mk_move64(JTabImm, JTabReg),
+ hipe_x86:mk_jmp_switch(Index, JTabReg, Labels)].
+-else.
+mk_jmp_switch(Index, JTabLab, Labels) ->
+ %% this is equivalent to "jmp *JTabLab(,Index,4)"
+ %% ("r = Index; r *= 4; r += &JTab; jmp *r" isn't as nice)
+ [hipe_x86:mk_jmp_switch(Index, JTabLab, Labels)].
+-endif.
+
+%%% Finalise the translation of a load_address instruction.
+
+-ifdef(HIPE_AMD64).
+mk_load_address(Type, Src, Dst) ->
+ case Type of
+ c_const -> % 32 bits
+ [hipe_x86:mk_move(Src, Dst)];
+ _ ->
+ [hipe_x86:mk_move64(Src, Dst)]
+ end.
+-else.
+mk_load_address(_Type, Src, Dst) ->
+ [hipe_x86:mk_move(Src, Dst)].
+-endif.
+
+%%% Translate 32-bit and larger loads.
+
+-ifdef(HIPE_AMD64).
+mk_load(LoadSize, LoadSign, Src, Off, Dst) ->
+ case {LoadSize, LoadSign} of
+ {int32, signed} ->
+ [hipe_x86:mk_movsx(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)];
+ {int32, unsigned} ->
+ %% The processor zero-extends for us. No need for 'movzx'.
+ [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, 'int32'), Dst)];
+ {_, _} ->
+ mk_load_word(Src, Off, Dst)
+ end.
+-else.
+mk_load(_LoadSize, _LoadSign, Src, Off, Dst) ->
+ mk_load_word(Src, Off, Dst).
+-endif.
+
+mk_load_word(Src, Off, Dst) ->
+ Type = typeof_dst(Dst),
+ [hipe_x86:mk_move(hipe_x86:mk_mem(Src, Off, Type), Dst)].
+
+%%% Finalise the translation of a store instruction.
+
+-ifdef(HIPE_AMD64).
+mk_store(RtlStoreSize, Src, Ptr, Off) ->
+ Type = case RtlStoreSize of
+ word ->
+ typeof_src(Src);
+ OtherType ->
+ OtherType
+ end,
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))].
+-else.
+mk_store(RtlStoreSize, Src, Ptr, Off) ->
+ case RtlStoreSize of
+ word ->
+ Type = typeof_src(Src),
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ int32 ->
+ Type = typeof_src(Src),
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ int16 ->
+ Type = 'int16',
+ [hipe_x86:mk_move(Src, hipe_x86:mk_mem(Ptr, Off, Type))];
+ byte ->
+ Type = 'byte',
+ {NewSrc, I1} = conv_small_store(Src),
+ I1 ++ [hipe_x86:mk_move(NewSrc, hipe_x86:mk_mem(Ptr, Off, Type))]
+ end.
+
+conv_small_store(Src) ->
+ case hipe_x86:is_imm(Src) of
+ true ->
+ {Src, []};
+ false ->
+ NewSrc = hipe_x86:mk_temp(hipe_x86_registers:eax(), 'untagged'),
+ {NewSrc, [hipe_x86:mk_move(Src, NewSrc)]}
+ end.
+-endif.
diff --git a/lib/hipe/x86/hipe_x86.erl b/lib/hipe/x86/hipe_x86.erl
new file mode 100644
index 0000000000..3298151366
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86.erl
@@ -0,0 +1,496 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% representation of 2-address pseudo-amd64 code
+
+-module(hipe_x86).
+
+-include("hipe_x86.hrl").
+
+%% Commented out are interface functions which are currently not used.
+-export([mk_temp/2,
+ %% mk_nonallocatable_temp/2,
+ mk_new_temp/1,
+ mk_new_nonallocatable_temp/1,
+ is_temp/1,
+ temp_reg/1,
+ temp_type/1,
+ temp_is_allocatable/1,
+
+ mk_imm/1,
+ mk_imm_from_addr/2,
+ mk_imm_from_atom/1,
+ is_imm/1,
+ %% imm_value/1,
+
+ mk_mem/3,
+ %% is_mem/1,
+ %% mem_base/1,
+ %% mem_off/1,
+ mem_type/1,
+
+ mk_fpreg/1,
+ mk_fpreg/2,
+ %% is_fpreg/1,
+ %% fpreg_is_pseudo/1,
+ %% fpreg_reg/1,
+
+ mk_mfa/3,
+ %% is_mfa/1,
+
+ mk_prim/1,
+ is_prim/1,
+ prim_prim/1,
+
+ mk_sdesc/4,
+
+ %% insn_type/1,
+
+ mk_alu/3,
+ %% is_alu/1,
+ alu_op/1,
+ alu_src/1,
+ alu_dst/1,
+
+ mk_call/3,
+ %% is_call/1,
+ call_fun/1,
+ call_sdesc/1,
+ call_linkage/1,
+
+ %% mk_cmovcc/3,
+ %% is_cmovcc/1,
+ cmovcc_cc/1,
+ cmovcc_src/1,
+ cmovcc_dst/1,
+
+ mk_cmp/2,
+ %% is_cmp/1,
+ cmp_src/1,
+ cmp_dst/1,
+
+ mk_comment/1,
+ %% is_comment/1,
+ %% comment_term/1,
+
+ mk_fmove/2,
+ is_fmove/1,
+ fmove_src/1,
+ fmove_dst/1,
+
+ mk_fp_unop/2,
+ %% is_fp_unop/1,
+ fp_unop_arg/1,
+ fp_unop_op/1,
+
+ mk_fp_binop/3,
+ %% is_fp_binop/1,
+ fp_binop_src/1,
+ fp_binop_dst/1,
+ fp_binop_op/1,
+
+ mk_imul/3,
+ imul_imm_opt/1,
+ imul_src/1,
+ imul_temp/1,
+
+ mk_jcc/2,
+ %% is_jcc/1,
+ jcc_cc/1,
+ jcc_label/1,
+
+ mk_jmp_fun/2,
+ %% is_jmp_fun/1,
+ jmp_fun_fun/1,
+ jmp_fun_linkage/1,
+
+ mk_jmp_label/1,
+ %% is_jmp_label/1,
+ jmp_label_label/1,
+
+ mk_jmp_switch/3,
+ %% is_jmp_switch/1,
+ jmp_switch_temp/1,
+ jmp_switch_jtab/1,
+ %% jmp_switch_labels/1,
+
+ mk_label/1,
+ is_label/1,
+ label_label/1,
+
+ mk_lea/2,
+ %% is_lea/1,
+ lea_mem/1,
+ lea_temp/1,
+
+ mk_move/2,
+ is_move/1,
+ move_src/1,
+ move_dst/1,
+ mk_move64/2,
+ %% is_move64/1,
+ move64_src/1,
+ move64_dst/1,
+
+ mk_movsx/2,
+ %% is_movsx/1,
+ movsx_src/1,
+ movsx_dst/1,
+
+ mk_movzx/2,
+ %% is_movzx/1,
+ movzx_src/1,
+ movzx_dst/1,
+
+ mk_pseudo_call/4,
+ %% is_pseudo_call/1,
+ pseudo_call_fun/1,
+ pseudo_call_sdesc/1,
+ pseudo_call_contlab/1,
+ pseudo_call_linkage/1,
+
+ mk_pseudo_jcc/4,
+ %% is_pseudo_jcc/1,
+ %% pseudo_jcc_cc/1,
+ %% pseudo_jcc_true_label/1,
+ %% pseudo_jcc_false_label/1,
+ %% pseudo_jcc_pred/1,
+
+ mk_pseudo_spill/1,
+
+ mk_pseudo_tailcall/4,
+ %% is_pseudo_tailcall/1,
+ pseudo_tailcall_fun/1,
+ %% pseudo_tailcall_arity/1,
+ pseudo_tailcall_stkargs/1,
+ pseudo_tailcall_linkage/1,
+
+ mk_pseudo_tailcall_prepare/0,
+ %% is_pseudo_tailcall_prepare/1,
+
+ mk_push/1,
+ %% is_push/1,
+ push_src/1,
+
+ %% mk_pop/1,
+ pop_dst/1,
+
+ mk_ret/1,
+ %% is_ret/1,
+ ret_npop/1,
+
+ mk_shift/3,
+ %% is_shift/1,
+ shift_op/1,
+ shift_src/1,
+ shift_dst/1,
+
+ %% mk_test/2,
+ test_src/1,
+ test_dst/1,
+
+ mk_defun/8,
+ defun_mfa/1,
+ defun_formals/1,
+ defun_is_closure/1,
+ defun_is_leaf/1,
+ defun_code/1,
+ defun_data/1,
+ defun_var_range/1
+ %% defun_label_range/1,
+
+ %% highest_temp/1
+ ]).
+
+%%%
+%%% Low-level accessors.
+%%%
+
+mk_temp(Reg, Type) when is_integer(Reg) ->
+ #x86_temp{reg=Reg, type=Type, allocatable=true}.
+mk_nonallocatable_temp(Reg, Type) when is_integer(Reg) ->
+ #x86_temp{reg=Reg, type=Type, allocatable=false}.
+mk_new_temp(Type) ->
+ mk_temp(hipe_gensym:get_next_var(x86), Type).
+mk_new_nonallocatable_temp(Type) ->
+ mk_nonallocatable_temp(hipe_gensym:get_next_var(x86), Type).
+is_temp(X) -> case X of #x86_temp{} -> true; _ -> false end.
+temp_reg(#x86_temp{reg=Reg}) when is_integer(Reg) -> Reg.
+temp_type(#x86_temp{type=Type}) -> Type.
+temp_is_allocatable(#x86_temp{allocatable=A}) -> A.
+
+mk_imm(Value) -> #x86_imm{value=Value}.
+mk_imm_from_addr(Addr, Type) ->
+ mk_imm({Addr, Type}).
+mk_imm_from_atom(Atom) ->
+ mk_imm(Atom).
+is_imm(X) -> case X of #x86_imm{} -> true; _ -> false end.
+%% imm_value(#x86_imm{value=Value}) -> Value.
+
+mk_mem(Base, Off, Type) -> #x86_mem{base=Base, off=Off, type=Type}.
+%% is_mem(X) -> case X of #x86_mem{} -> true; _ -> false end.
+%% mem_base(#x86_mem{base=Base}) -> Base.
+%% mem_off(#x86_mem{off=Off}) -> Off.
+mem_type(#x86_mem{type=Type}) -> Type.
+
+mk_fpreg(Reg) -> #x86_fpreg{reg=Reg, pseudo=true}.
+mk_fpreg(Reg, Pseudo) -> #x86_fpreg{reg=Reg, pseudo=Pseudo}.
+%% is_fpreg(F) -> case F of #x86_fpreg{} -> true;_ -> false end.
+%% fpreg_is_pseudo(#x86_fpreg{pseudo=Pseudo}) -> Pseudo.
+%% fpreg_reg(#x86_fpreg{reg=Reg}) -> Reg.
+
+mk_mfa(M, F, A) -> #x86_mfa{m=M, f=F, a=A}.
+%% is_mfa(X) -> case X of #x86_mfa{} -> true; _ -> false end.
+
+mk_prim(Prim) -> #x86_prim{prim=Prim}.
+is_prim(X) -> case X of #x86_prim{} -> true; _ -> false end.
+prim_prim(#x86_prim{prim=Prim}) -> Prim.
+
+mk_sdesc(ExnLab, FSize, Arity, Live) ->
+ #x86_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}.
+
+insn_type(Insn) ->
+ element(1, Insn).
+
+is_insn_type(Insn, Type) ->
+ case insn_type(Insn) of
+ Type -> true;
+ _ -> false
+ end.
+
+mk_alu(Op, Src, Dst) -> #alu{aluop=Op, src=Src, dst=Dst}.
+%% is_alu(Insn) -> is_insn_type(Insn, alu).
+alu_op(#alu{aluop=Op}) -> Op.
+alu_src(#alu{src=Src}) -> Src.
+alu_dst(#alu{dst=Dst}) -> Dst.
+
+mk_call(Fun, SDesc, Linkage) ->
+ check_linkage(Linkage),
+ #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage}.
+%% is_call(Insn) -> is_insn_type(Insn, call).
+call_fun(#call{'fun'=Fun}) -> Fun.
+call_sdesc(#call{sdesc=SDesc}) -> SDesc.
+call_linkage(#call{linkage=Linkage}) -> Linkage.
+
+check_linkage(Linkage) ->
+ case Linkage of
+ remote -> [];
+ not_remote -> []
+ end.
+
+%% mk_cmovcc(Cc, Src, Dst) -> #cmovcc{cc=Cc, src=Src, dst=Dst}.
+%% is_cmovcc(Insn) -> is_insn_type(Insn, cmovcc).
+cmovcc_cc(#cmovcc{cc=Cc}) -> Cc.
+cmovcc_src(#cmovcc{src=Src}) -> Src.
+cmovcc_dst(#cmovcc{dst=Dst}) -> Dst.
+
+mk_cmp(Src, Dst) -> #cmp{src=Src, dst=Dst}.
+%% is_cmp(Insn) -> is_insn_type(Insn, cmp).
+cmp_src(#cmp{src=Src}) -> Src.
+cmp_dst(#cmp{dst=Dst}) -> Dst.
+
+%% mk_test(Src, Dst) -> #test{src=Src, dst=Dst}.
+test_src(#test{src=Src}) -> Src.
+test_dst(#test{dst=Dst}) -> Dst.
+
+mk_comment(Term) -> #comment{term=Term}.
+%% is_comment(Insn) -> is_insn_type(Insn, comment).
+%% comment_term(#comment{term=Term}) -> Term.
+
+mk_fmove(Src, Dst) -> #fmove{src=Src, dst=Dst}.
+is_fmove(F) -> is_insn_type(F, fmove).
+fmove_src(#fmove{src=Src}) -> Src.
+fmove_dst(#fmove{dst=Dst}) -> Dst.
+
+mk_fp_unop(Op, Arg) -> #fp_unop{op=Op, arg=Arg}.
+%% is_fp_unop(F) -> is_insn_type(F, fp_unop).
+fp_unop_arg(#fp_unop{arg=Arg}) -> Arg.
+fp_unop_op(#fp_unop{op=Op}) -> Op.
+
+mk_fp_binop(Op, Src, Dst) -> #fp_binop{op=Op, src=Src, dst=Dst}.
+%% is_fp_binop(F) -> is_insn_type(F, fp_binop).
+fp_binop_src(#fp_binop{src=Src}) -> Src.
+fp_binop_dst(#fp_binop{dst=Dst}) -> Dst.
+fp_binop_op(#fp_binop{op=Op}) -> Op.
+
+mk_imul(ImmOpt, Src, Temp) -> #imul{imm_opt=ImmOpt, src=Src, temp=Temp}.
+imul_imm_opt(#imul{imm_opt=ImmOpt}) -> ImmOpt.
+imul_src(#imul{src=Src}) -> Src.
+imul_temp(#imul{temp=Temp}) -> Temp.
+
+mk_jcc(Cc, Label) -> #jcc{cc=Cc, label=Label}.
+%% is_jcc(Insn) -> is_insn_type(Insn, jcc).
+jcc_cc(#jcc{cc=Cc}) -> Cc.
+jcc_label(#jcc{label=Label}) -> Label.
+
+mk_jmp_fun(Fun, Linkage) ->
+ check_linkage(Linkage),
+ #jmp_fun{'fun'=Fun, linkage=Linkage}.
+%% is_jmp_fun(Insn) -> is_insn_type(Insn, jmp_fun).
+jmp_fun_fun(#jmp_fun{'fun'=Fun}) -> Fun.
+jmp_fun_linkage(#jmp_fun{linkage=Linkage}) -> Linkage.
+
+mk_jmp_label(Label) -> #jmp_label{label=Label}.
+%% is_jmp_label(Insn) -> is_insn_type(Insn, jmp_label).
+jmp_label_label(#jmp_label{label=Label}) -> Label.
+
+mk_jmp_switch(Temp, JTab, Labels) ->
+ #jmp_switch{temp=Temp, jtab=JTab, labels=Labels}.
+%% is_jmp_switch(Insn) -> is_insn_type(Insn, jmp_switch).
+jmp_switch_temp(#jmp_switch{temp=Temp}) -> Temp.
+jmp_switch_jtab(#jmp_switch{jtab=JTab}) -> JTab.
+%% jmp_switch_labels(#jmp_switch{labels=Labels}) -> Labels.
+
+mk_label(Label) -> #label{label=Label}.
+is_label(Insn) -> is_insn_type(Insn, label).
+label_label(#label{label=Label}) -> Label.
+
+mk_lea(Mem, Temp) -> #lea{mem=Mem, temp=Temp}.
+%% is_lea(Insn) -> is_insn_type(Insn, lea).
+lea_mem(#lea{mem=Mem}) -> Mem.
+lea_temp(#lea{temp=Temp}) -> Temp.
+
+mk_move(Src, Dst) -> #move{src=Src, dst=Dst}.
+is_move(Insn) -> is_insn_type(Insn, move).
+move_src(#move{src=Src}) -> Src.
+move_dst(#move{dst=Dst}) -> Dst.
+
+mk_move64(Imm, Dst) -> #move64{imm=Imm, dst=Dst}.
+%% is_move64(Insn) -> is_insn_type(Insn, move64).
+move64_src(#move64{imm=Imm}) -> Imm.
+move64_dst(#move64{dst=Dst}) -> Dst.
+
+mk_movsx(Src, Dst) -> #movsx{src=Src, dst=Dst}.
+%% is_movsx(Insn) -> is_insn_type(Insn, movsx).
+movsx_src(#movsx{src=Src}) -> Src.
+movsx_dst(#movsx{dst=Dst}) -> Dst.
+
+mk_movzx(Src, Dst) -> #movzx{src=Src, dst=Dst}.
+%% is_movzx(Insn) -> is_insn_type(Insn, movzx).
+movzx_src(#movzx{src=Src}) -> Src.
+movzx_dst(#movzx{dst=Dst}) -> Dst.
+
+mk_pseudo_call(Fun, SDesc, ContLab, Linkage) ->
+ check_linkage(Linkage),
+ #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage}.
+%% is_pseudo_call(Insn) -> is_insn_type(Insn, pseudo_call).
+pseudo_call_fun(#pseudo_call{'fun'=Fun}) -> Fun.
+pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc.
+pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab.
+pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage.
+
+mk_pseudo_jcc(Cc, TrueLabel, FalseLabel, Pred) -> % 'smart' constructor
+ if Pred >= 0.5 ->
+ mk_pseudo_jcc_simple(neg_cc(Cc), FalseLabel, TrueLabel, 1.0-Pred);
+ true ->
+ mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred)
+ end.
+neg_cc(Cc) ->
+ case Cc of
+ 'e' -> 'ne'; % ==, !=
+ 'ne' -> 'e'; % !=, ==
+ 'g' -> 'le'; % >, <=
+ 'a' -> 'be'; % >u, <=u
+ 'ge' -> 'l'; % >=, <
+ 'ae' -> 'b'; % >=u, <u
+ 'l' -> 'ge'; % <, >=
+ 'b' -> 'ae'; % <u, >=u
+ 'le' -> 'g'; % <=, >
+ 'be' -> 'a'; % <=u, >u
+ 'o' -> 'no'; % overflow, not_overflow
+ 'no' -> 'o'; % not_overflow, overflow
+ _ -> exit({?MODULE, {"unknown cc", Cc}})
+ end.
+mk_pseudo_jcc_simple(Cc, TrueLabel, FalseLabel, Pred) ->
+ #pseudo_jcc{cc=Cc, true_label=TrueLabel, false_label=FalseLabel, pred=Pred}.
+%% is_pseudo_jcc(Insn) -> is_insn_type(Insn, pseudo_jcc).
+%% pseudo_jcc_cc(#pseudo_jcc{cc=Cc}) -> Cc.
+%% pseudo_jcc_true_label(#pseudo_jcc{true_label=TrueLabel}) -> TrueLabel.
+%% pseudo_jcc_false_label(#pseudo_jcc{false_label=FalseLabel}) -> FalseLabel.
+%% pseudo_jcc_pred(#pseudo_jcc{pred=Pred}) -> Pred.
+
+mk_pseudo_spill(List) ->
+ #pseudo_spill{args=List}.
+
+mk_pseudo_tailcall(Fun, Arity, StkArgs, Linkage) ->
+ check_linkage(Linkage),
+ #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage}.
+%% is_pseudo_tailcall(Insn) -> is_insn_type(Insn, pseudo_tailcall).
+pseudo_tailcall_fun(#pseudo_tailcall{'fun'=Fun}) -> Fun.
+%% pseudo_tailcall_arity(#pseudo_tailcall{arity=Arity}) -> Arity.
+pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs.
+pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage.
+
+mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}.
+%% is_pseudo_tailcall_prepare(Insn) -> is_insn_type(Insn, pseudo_tailcall_prepare).
+
+mk_push(Src) -> #push{src=Src}.
+%% is_push(Insn) -> is_insn_type(Insn, push).
+push_src(#push{src=Src}) -> Src.
+
+%% mk_pop(Dst) -> #pop{dst=Dst}.
+%% is_push(Insn) -> is_insn_type(Insn, push).
+pop_dst(#pop{dst=Dst}) -> Dst.
+
+mk_ret(NPop) -> #ret{npop=NPop}.
+%% is_ret(Insn) -> is_insn_type(Insn, ret).
+ret_npop(#ret{npop=NPop}) -> NPop.
+
+mk_shift(ShiftOp, Src, Dst) ->
+ #shift{shiftop=ShiftOp, src=Src, dst=Dst}.
+%% is_shift(Insn) -> is_insn_type(Insn, shift).
+shift_op(#shift{shiftop=ShiftOp}) -> ShiftOp.
+shift_src(#shift{src=Src}) -> Src.
+shift_dst(#shift{dst=Dst}) -> Dst.
+
+mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) ->
+ #defun{mfa=MFA, formals=Formals, code=Code, data=Data,
+ isclosure=IsClosure, isleaf=IsLeaf,
+ var_range=VarRange, label_range=LabelRange}.
+defun_mfa(#defun{mfa=MFA}) -> MFA.
+defun_formals(#defun{formals=Formals}) -> Formals.
+defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure.
+defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf.
+defun_code(#defun{code=Code}) -> Code.
+defun_data(#defun{data=Data}) -> Data.
+defun_var_range(#defun{var_range=VarRange}) -> VarRange.
+%% defun_label_range(#defun{label_range=LabelRange}) -> LabelRange.
+
+%% highest_temp(Code) ->
+%% highest_temp(Code,0).
+%%
+%% highest_temp([I|Is],Max) ->
+%% Defs = hipe_x86_defuse:insn_def(I),
+%% Uses = hipe_x86_defuse:insn_use(I),
+%% highest_temp(Is,new_max(Defs++Uses,Max));
+%% highest_temp([],Max) ->
+%% Max.
+%%
+%% new_max([V|Vs],Max) ->
+%% case is_temp(V) of
+%% true ->
+%% TReg = temp_reg(V),
+%% if TReg > Max ->
+%% new_max(Vs, TReg);
+%% true ->
+%% new_max(Vs, Max)
+%% end;
+%% false ->
+%% new_max(Vs, Max)
+%% end;
+%% new_max([],Max) -> Max.
diff --git a/lib/hipe/x86/hipe_x86.hrl b/lib/hipe/x86/hipe_x86.hrl
new file mode 100644
index 0000000000..3d22fb381f
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86.hrl
@@ -0,0 +1,116 @@
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% concrete representation of 2-address pseudo-x86 code
+
+%%%--------------------------------------------------------------------
+%%% x86 operands:
+%%%
+%%% int32 ::= <a 32-bit integer>
+%%% reg ::= <token from hipe_x86_registers module>
+%%% type ::= 'tagged' | 'untagged'
+%%% label ::= <an integer>
+%%% label_type ::= 'label' | 'constant'
+%%% aluop ::= <an atom denoting a binary alu op>
+%%% term ::= <any Erlang term>
+%%% cc ::= <an atom denoting a condition code>
+%%% pred ::= <a real number between 0.0 and 1.0 inclusive>
+%%% npop ::= <a 32-bit natural number which is a multiple of 4>
+%%%
+%%% temp ::= {x86_temp, reg, type, allocatable}
+%%% allocatable ::= 'true' | 'false'
+%%%
+%%% imm ::= {x86_imm, value}
+%%% value ::= int32 | atom | {label, label_type}
+%%%
+%%% mem ::= {x86_mem, base, off, mem_type}
+%%% base ::= temp | [] (XXX BUG: not quite true before RA)
+%%% off ::= imm | temp
+%%% mem_type ::= 'byte' | 'int16' (only valid with mov{s,z}x)
+%%% | type
+%%%
+%%% src ::= temp | mem | imm
+%%% dst ::= temp | mem
+%%% arg ::= src
+%%% args ::= <list of arg>
+%%%
+%%% mfa ::= {x86_mfa, atom, atom, byte}
+%%% prim ::= {x86_prim, atom}
+%%% fun ::= mfa | prim | temp | mem
+%%%
+%%% jtab ::= label (equiv. to {x86_imm,{label,'constant'}})
+%%%
+%%% sdesc ::= {x86_sdesc, exnlab, fsize, arity, live}
+%%% exnlab ::= [] | label
+%%% fsize ::= <int32> (frame size in words)
+%%% live ::= <tuple of int32> (word offsets)
+%%% arity ::= int32
+
+-record(x86_temp, {reg, type, allocatable}).
+-record(x86_imm, {value}).
+-record(x86_mem, {base, off, type}).
+-record(x86_fpreg, {reg, pseudo}).
+-record(x86_mfa, {m::atom(), f::atom(), a::arity()}).
+-record(x86_prim, {prim}).
+-record(x86_sdesc, {exnlab, fsize, arity::arity(), live::tuple()}).
+
+%%% Basic instructions.
+%%% These follow the AT&T convention, i.e. op src,dst (dst := dst op src)
+%%% After register allocation, at most one operand in a binary
+%%% instruction (alu, cmp, move) may denote a memory cell.
+%%% After frame allocation, every temp must denote a physical register.
+
+-record(alu, {aluop, src, dst}).
+-record(call, {'fun', sdesc, linkage}).
+-record(cmovcc, {cc, src, dst}).
+-record(cmp, {src, dst}). % a 'sub' alu which doesn't update dst
+-record(comment, {term}).
+-record(fmove, {src, dst}).
+-record(fp_binop, {op, src, dst}).
+-record(fp_unop, {op, arg}). % arg may be [] :-(
+-record(imul, {imm_opt, src, temp}). % imm_opt:[]|imm, src:temp|mem
+-record(jcc, {cc, label}).
+-record(jmp_fun, {'fun', linkage}). % tailcall, direct or indirect
+-record(jmp_label, {label}). % local jmp, direct
+-record(jmp_switch, {temp, jtab, labels}). % local jmp, indirect
+-record(label, {label}).
+-record(lea, {mem, temp}).
+-record(move, {src, dst}).
+-record(move64, {imm, dst}).
+-record(movsx, {src, dst}).
+-record(movzx, {src, dst}).
+-record(pseudo_call, {'fun', sdesc, contlab, linkage}).
+-record(pseudo_jcc, {cc, true_label, false_label, pred}).
+-record(pseudo_spill, {args=[]}).
+-record(pseudo_tailcall, {'fun', arity, stkargs, linkage}).
+-record(pseudo_tailcall_prepare, {}).
+-record(push, {src}).
+-record(pop, {dst}).
+-record(ret, {npop}). % EAX is live-in
+-record(shift, {shiftop, src, dst}).
+-record(test, {src, dst}).
+
+%%% Function definitions.
+
+-include("../misc/hipe_consttab.hrl").
+
+-record(defun, {mfa :: mfa(), formals, code,
+ data :: hipe_consttab(),
+ isclosure :: boolean(),
+ isleaf :: boolean(),
+ var_range, label_range}).
diff --git a/lib/hipe/x86/hipe_x86_assemble.erl b/lib/hipe/x86/hipe_x86_assemble.erl
new file mode 100644
index 0000000000..4e65736db3
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_assemble.erl
@@ -0,0 +1,1014 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% HiPE/x86 assembler
+%%%
+%%% TODO:
+%%% - Simplify combine_label_maps and mk_data_relocs.
+%%% - Move find_const to hipe_pack_constants?
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble).
+-define(HIPE_X86_ENCODE, hipe_amd64_encode).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-ifdef(AMD64_SIMULATE_NSP).
+-define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP).
+-endif.
+-define(EAX, rax).
+-define(REGArch, reg64).
+-define(RMArch, rm64).
+-define(EA_DISP32_ABSOLUTE, ea_disp32_sindex).
+-else.
+-define(HIPE_X86_ASSEMBLE, hipe_x86_assemble).
+-define(HIPE_X86_ENCODE, hipe_x86_encode).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(EAX, eax).
+-define(REGArch, reg32).
+-define(RMArch, rm32).
+-define(EA_DISP32_ABSOLUTE, ea_disp32).
+-endif.
+
+-module(?HIPE_X86_ASSEMBLE).
+-export([assemble/4]).
+
+-define(DEBUG,true).
+
+-include("../main/hipe.hrl").
+-include("../x86/hipe_x86.hrl").
+-include("../../kernel/src/hipe_ext_format.hrl").
+-include("../rtl/hipe_literals.hrl").
+-include("../misc/hipe_sdi.hrl").
+-undef(ASSERT).
+-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end).
+
+assemble(CompiledCode, Closures, Exports, Options) ->
+ ?when_option(time, Options, ?start_timer("x86 assembler")),
+ print("****************** Assembling *******************\n", [], Options),
+ %%
+ Code = [{MFA,
+ hipe_x86:defun_code(Defun),
+ hipe_x86:defun_data(Defun)}
+ || {MFA, Defun} <- CompiledCode],
+ %%
+ {ConstAlign,ConstSize,ConstMap,RefsFromConsts} =
+ hipe_pack_constants:pack_constants(Code, ?HIPE_X86_REGISTERS:alignment()),
+ %%
+ {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} =
+ encode(translate(Code, ConstMap, Options), Options),
+ print("Total num bytes=~w\n", [CodeSize], Options),
+ %% put(code_size, CodeSize),
+ %% put(const_size, ConstSize),
+ %% ?when_option(verbose, Options,
+ %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])),
+ %%
+ SC = hipe_pack_constants:slim_constmap(ConstMap),
+ DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap),
+ SSE = slim_sorted_exportmap(ExportMap,Closures,Exports),
+ SlimRefs = hipe_pack_constants:slim_refs(AccRefs),
+ Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC},
+ ConstAlign, ConstSize,
+ SC,
+ DataRelocs, % nee LM, LabelMap
+ SSE,
+ CodeSize,CodeBinary,SlimRefs,
+ 0,[] % ColdCodeSize, SlimColdRefs
+ ]),
+ %%
+ %% ?when_option(time, Options, ?stop_timer("x86 assembler")),
+ Bin.
+
+%%%
+%%% Assembly Pass 1.
+%%% Process initial {MFA,Code,Data} list.
+%%% Translate each MFA's body, choosing operand & instruction kinds.
+%%%
+%%% Assembly Pass 2.
+%%% Perform short/long form optimisation for jumps.
+%%% Build LabelMap for each MFA.
+%%%
+%%% Result is {MFA,NewCode,CodeSize,LabelMap} list.
+%%%
+
+translate(Code, ConstMap, Options) ->
+ translate_mfas(Code, ConstMap, [], Options).
+
+translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) ->
+ {NewInsns,CodeSize,LabelMap} =
+ translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options),
+ translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options);
+translate_mfas([], _ConstMap, NewCode, _Options) ->
+ lists:reverse(NewCode).
+
+translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) ->
+ NewIs = translate_insn(I, Context, Options),
+ add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options);
+translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) ->
+ {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1),
+ {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}.
+
+add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) ->
+ NewSdiPass1 =
+ case I of
+ {'.label',L,_} ->
+ hipe_sdi:pass1_add_label(SdiPass1, Address, L);
+ {jcc_sdi,{_,{label,L}},_} ->
+ SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2},
+ hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo);
+ {jmp_sdi,{{label,L}},_} ->
+ SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2},
+ hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo);
+ _ ->
+ SdiPass1
+ end,
+ Address1 = Address + insn_size(I),
+ add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options);
+add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) ->
+ translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options).
+
+insn_size(I) ->
+ case I of
+ {'.label',_,_} -> 0;
+ {'.sdesc',_,_} -> 0;
+ {jcc_sdi,_,_} -> 2;
+ {jmp_sdi,_,_} -> 2;
+ {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg)
+ end.
+
+translate_insn(I, Context, Options) ->
+ case I of
+ #alu{} ->
+ Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context),
+ [{hipe_x86:alu_op(I), Arg, I}];
+ #call{} ->
+ translate_call(I);
+ #cmovcc{} ->
+ {Dst,Src} = resolve_move_args(
+ hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I),
+ Context),
+ CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))},
+ Arg = {CC,Dst,Src},
+ [{cmovcc, Arg, I}];
+ #cmp{} ->
+ Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context),
+ [{cmp, Arg, I}];
+ #comment{} ->
+ [];
+ #fmove{} ->
+ {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I),
+ hipe_x86:fmove_dst(I)),
+ [{Op, Arg, I}];
+ #fp_binop{} ->
+ case proplists:get_bool(x87, Options) of
+ true -> % x87
+ Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I),
+ hipe_x86:fp_binop_dst(I)),
+ [{hipe_x86:fp_binop_op(I), Arg, I}];
+ false -> % sse2
+ Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I),
+ hipe_x86:fp_binop_dst(I)),
+ [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}]
+ end;
+ #fp_unop{} ->
+ case proplists:get_bool(x87, Options) of
+ true -> % x87
+ Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)),
+ [{hipe_x86:fp_unop_op(I), Arg, I}];
+ false -> % sse2
+ case hipe_x86:fp_unop_op(I) of
+ 'fchs' ->
+ Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)),
+ [{'xorpd', Arg, I}];
+ 'fwait' -> % no op on sse2, magic on x87
+ []
+ end
+ end;
+ #imul{} ->
+ translate_imul(I, Context);
+ #jcc{} ->
+ Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))},
+ Label = translate_label(hipe_x86:jcc_label(I)),
+ [{jcc_sdi, {Cc,Label}, I}];
+ #jmp_fun{} ->
+ %% call and jmp are patched the same, so no need to distinguish
+ %% call from tailcall
+ PatchTypeExt =
+ case hipe_x86:jmp_fun_linkage(I) of
+ remote -> ?CALL_REMOTE;
+ not_remote -> ?CALL_LOCAL
+ end,
+ Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt),
+ [{jmp, {Arg}, I}];
+ #jmp_label{} ->
+ Arg = translate_label(hipe_x86:jmp_label_label(I)),
+ [{jmp_sdi, {Arg}, I}];
+ #jmp_switch{} ->
+ RM32 = resolve_jmp_switch_arg(I, Context),
+ [{jmp, {RM32}, I}];
+ #label{} ->
+ [{'.label', hipe_x86:label_label(I), I}];
+ #lea{} ->
+ Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)),
+ [{lea, Arg, I}];
+ #move{} ->
+ Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I),
+ Context),
+ [{mov, Arg, I}];
+ #move64{} ->
+ translate_move64(I, Context);
+ #movsx{} ->
+ Arg = resolve_movx_args(hipe_x86:movsx_src(I), hipe_x86:movsx_dst(I)),
+ [{movsx, Arg, I}];
+ #movzx{} ->
+ Arg = resolve_movx_args(hipe_x86:movzx_src(I), hipe_x86:movzx_dst(I)),
+ [{movzx, Arg, I}];
+ %% pseudo_call: eliminated before assembly
+ %% pseudo_jcc: eliminated before assembly
+ %% pseudo_tailcall: eliminated before assembly
+ %% pseudo_tailcall_prepare: eliminated before assembly
+ #pop{} ->
+ Arg = translate_dst(hipe_x86:pop_dst(I)),
+ [{pop, {Arg}, I}];
+ #push{} ->
+ Arg = translate_src(hipe_x86:push_src(I), Context),
+ [{push, {Arg}, I}];
+ #ret{} ->
+ translate_ret(I);
+ #shift{} ->
+ Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context),
+ [{hipe_x86:shift_op(I), Arg, I}];
+ #test{} ->
+ Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context),
+ [{test, Arg, I}]
+ end.
+
+-ifdef(X86_SIMULATE_NSP).
+-ifdef(HIPE_AMD64).
+translate_call(I) ->
+ WordSize = hipe_amd64_registers:wordsize(),
+ RegSP = 2#100, % esp/rsp
+ TempSP = hipe_x86:mk_temp(RegSP, untagged),
+ FunOrig = hipe_x86:call_fun(I),
+ Fun =
+ case FunOrig of
+ #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} ->
+ FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}};
+ _ -> FunOrig
+ end,
+ RegRA =
+ begin
+ RegTemp0 = hipe_amd64_registers:temp0(),
+ RegTemp1 = hipe_amd64_registers:temp1(),
+ case Fun of
+ #x86_temp{reg=RegTemp0} -> RegTemp1;
+ #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1;
+ _ -> RegTemp0
+ end
+ end,
+ TempRA = hipe_x86:mk_temp(RegRA, untagged),
+ PatchTypeExt =
+ case hipe_x86:call_linkage(I) of
+ remote -> ?CALL_REMOTE;
+ not_remote -> ?CALL_LOCAL
+ end,
+ JmpArg = translate_fun(Fun, PatchTypeExt),
+ I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}},
+ I3 = {jmp, {JmpArg}, #comment{term=call}},
+ Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}),
+ MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP,
+ hipe_x86:mk_imm(0),
+ untagged)),
+ temp_to_regArch(TempRA)},
+ I2 = {mov, MovArgs, #comment{term=call}},
+ Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs),
+ I1 = {lea, {temp_to_regArch(TempRA),
+ {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}},
+ #comment{term=call}},
+ I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I},
+ [I0,I1,I2,I3,I4].
+-else.
+translate_call(I) ->
+ WordSize = ?HIPE_X86_REGISTERS:wordsize(),
+ RegSP = 2#100, % esp/rsp
+ TempSP = hipe_x86:mk_temp(RegSP, untagged),
+ FunOrig = hipe_x86:call_fun(I),
+ Fun =
+ case FunOrig of
+ #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} ->
+ FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}};
+ _ -> FunOrig
+ end,
+ PatchTypeExt =
+ case hipe_x86:call_linkage(I) of
+ remote -> ?CALL_REMOTE;
+ not_remote -> ?CALL_LOCAL
+ end,
+ JmpArg = translate_fun(Fun, PatchTypeExt),
+ I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}},
+ I2 = {jmp, {JmpArg}, #comment{term=call}},
+ Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}),
+ I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP,
+ hipe_x86:mk_imm(0),
+ untagged)),
+ {imm32,{?X86ABSPCREL,4+Size2}}},
+ #comment{term=call}},
+ I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I},
+ [I0,I1,I2,I3].
+-endif.
+
+translate_ret(I) ->
+ NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(),
+ RegSP = 2#100, % esp/rsp
+ TempSP = hipe_x86:mk_temp(RegSP, untagged),
+ RegRA = 2#011, % ebx/rbx
+ TempRA = hipe_x86:mk_temp(RegRA, untagged),
+ [{mov,
+ {temp_to_regArch(TempRA),
+ mem_to_rmArch(hipe_x86:mk_mem(TempSP,
+ hipe_x86:mk_imm(0),
+ untagged))},
+ I},
+ {add,
+ {temp_to_rmArch(TempSP),
+ case NPOP < 128 of
+ true -> {imm8,NPOP};
+ false -> {imm32,NPOP}
+ end},
+ #comment{term=ret}},
+ {jmp,
+ {temp_to_rmArch(TempRA)},
+ #comment{term=ret}}].
+
+-else. % not X86_SIMULATE_NSP
+
+translate_call(I) ->
+ %% call and jmp are patched the same, so no need to distinguish
+ %% call from tailcall
+ PatchTypeExt =
+ case hipe_x86:call_linkage(I) of
+ remote -> ?CALL_REMOTE;
+ not_remote -> ?CALL_LOCAL
+ end,
+ Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt),
+ SDesc = hipe_x86:call_sdesc(I),
+ [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}].
+
+translate_ret(I) ->
+ Arg =
+ case hipe_x86:ret_npop(I) of
+ 0 -> {};
+ N -> {{imm16,N}}
+ end,
+ [{ret, Arg, I}].
+
+-endif. % X86_SIMULATE_NSP
+
+translate_imul(I, Context) ->
+ Temp = temp_to_regArch(hipe_x86:imul_temp(I)),
+ Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)),
+ Args =
+ case hipe_x86:imul_imm_opt(I) of
+ [] -> {Temp,Src};
+ Imm -> {Temp,Src,translate_imm(Imm, Context, true)}
+ end,
+ [{'imul', Args, I}].
+
+temp_or_mem_to_rmArch(Src) ->
+ case Src of
+ #x86_temp{} -> temp_to_rmArch(Src);
+ #x86_mem{} -> mem_to_rmArch(Src)
+ end.
+
+translate_label(Label) when is_integer(Label) ->
+ {label,Label}. % symbolic, since offset is not yet computable
+
+translate_fun(Arg, PatchTypeExt) ->
+ case Arg of
+ #x86_temp{} ->
+ temp_to_rmArch(Arg);
+ #x86_mem{} ->
+ mem_to_rmArch(Arg);
+ #x86_mfa{m=M,f=F,a=A} ->
+ {rel32,{PatchTypeExt,{M,F,A}}};
+ #x86_prim{prim=Prim} ->
+ {rel32,{PatchTypeExt,Prim}}
+ end.
+
+translate_src(Src, Context) ->
+ case Src of
+ #x86_imm{} ->
+ translate_imm(Src, Context, true);
+ _ ->
+ translate_dst(Src)
+ end.
+
+%%% MayTrunc8 controls whether negative Imm8s should be truncated
+%%% to 8 bits or not. Truncation should always be done, except when
+%%% the caller will widen the Imm8 to an Imm32 or Imm64.
+translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) ->
+ if is_atom(Imm) ->
+ {imm32,{?LOAD_ATOM,Imm}};
+ is_integer(Imm) ->
+ case (Imm =< 127) and (Imm >= -128) of
+ true ->
+ Imm8 =
+ case MayTrunc8 of
+ true -> Imm band 16#FF;
+ false -> Imm
+ end,
+ {imm8,Imm8};
+ false ->
+ {imm32,Imm}
+ end;
+ true ->
+ Val =
+ case Imm of
+ {Label,constant} ->
+ {MFA,ConstMap} = Context,
+ ConstNo = find_const({MFA,Label}, ConstMap),
+ {constant,ConstNo};
+ {Label,closure} ->
+ {closure,Label};
+ {Label,c_const} ->
+ {c_const,Label}
+ end,
+ {imm32,{?LOAD_ADDRESS,Val}}
+ end.
+
+translate_dst(Dst) ->
+ case Dst of
+ #x86_temp{} ->
+ temp_to_regArch(Dst);
+ #x86_mem{type='double'} ->
+ mem_to_rm64fp(Dst);
+ #x86_mem{} ->
+ mem_to_rmArch(Dst);
+ #x86_fpreg{} ->
+ fpreg_to_stack(Dst)
+ end.
+
+%%%
+%%% Assembly Pass 3.
+%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2.
+%%% Translate to a single binary code segment.
+%%% Collect relocation patches.
+%%% Build ExportMap (MFA-to-address mapping).
+%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility).
+%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}.
+%%%
+
+encode(Code, Options) ->
+ CodeSize = compute_code_size(Code, 0),
+ ExportMap = build_export_map(Code, 0, []),
+ {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options),
+ CodeBinary = list_to_binary(lists:reverse(AccCode)),
+ ?ASSERT(CodeSize =:= byte_size(CodeBinary)),
+ CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()),
+ {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}.
+
+nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead?
+
+align_entry(Address) -> Address + nr_pad_bytes(Address).
+
+compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) ->
+ compute_code_size(Code, align_entry(Size+CodeSize));
+compute_code_size([], Size) -> Size.
+
+build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) ->
+ build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]);
+build_export_map([], _Address, ExportMap) -> ExportMap.
+
+combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) ->
+ NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM),
+ combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM);
+combine_label_maps([], _Address, CLM) -> CLM.
+
+merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) ->
+ NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM),
+ merge_label_map(Rest, MFA, Address, NewCLM);
+merge_label_map([], _MFA, _Address, CLM) -> CLM.
+
+encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) ->
+ print("Generating code for:~w\n", [MFA], Options),
+ print("Offset | Opcode | Instruction\n", [], Options),
+ {Address1,Relocs1,AccCode1} =
+ encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options),
+ ExpectedAddress = align_entry(Address + CodeSize),
+ ?ASSERT(Address1 =:= ExpectedAddress),
+ print("Finished.\n\n", [], Options),
+ encode_mfas(Code, Address1, AccCode1, Relocs1, Options);
+encode_mfas([], _Address, AccCode, Relocs, _Options) ->
+ {AccCode, Relocs}.
+
+encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) ->
+ case I of
+ {'.label',L,_} ->
+ LabelAddress = gb_trees:get(L, LabelMap) + FunAddress,
+ ?ASSERT(Address =:= LabelAddress), % sanity check
+ print_insn(Address, [], I, Options),
+ encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options);
+ {'.sdesc',SDesc,_} ->
+ #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc,
+ ExnRA =
+ case ExnLab of
+ [] -> []; % don't cons up a new one
+ ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress
+ end,
+ Reloc = {?SDESC, Address,
+ ?STACK_DESC(ExnRA, FSize, Arity, Live)},
+ encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options);
+ _ ->
+ {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap),
+ {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address),
+ print_insn(Address, Bytes, I, Options),
+ Segment = list_to_binary(Bytes),
+ Size = byte_size(Segment),
+ NewAccCode = [Segment|AccCode],
+ encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options)
+ end;
+encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) ->
+ case nr_pad_bytes(Address) of
+ 0 ->
+ {Address,Relocs,AccCode};
+ NrPadBytes -> % triggers at most once per function body
+ Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}),
+ encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options)
+ end.
+
+fix_jumps(I, InsnAddress, FunAddress, LabelMap) ->
+ case I of
+ {jcc_sdi,{CC,{label,L}},OrigI} ->
+ LabelAddress = gb_trees:get(L, LabelMap) + FunAddress,
+ ShortOffset = LabelAddress - (InsnAddress + 2),
+ if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 ->
+ {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI};
+ true ->
+ LongOffset = LabelAddress - (InsnAddress + 6),
+ {jcc,{CC,{rel32,LongOffset}},OrigI}
+ end;
+ {jmp_sdi,{{label,L}},OrigI} ->
+ LabelAddress = gb_trees:get(L, LabelMap) + FunAddress,
+ ShortOffset = LabelAddress - (InsnAddress + 2),
+ if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 ->
+ {jmp,{{rel8,ShortOffset band 16#FF}},OrigI};
+ true ->
+ LongOffset = LabelAddress - (InsnAddress + 5),
+ {jmp,{{rel32,LongOffset}},OrigI}
+ end;
+ _ -> I
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+fpreg_to_stack(#x86_fpreg{reg=Reg}) ->
+ {fpst, Reg}.
+
+temp_to_regArch(#x86_temp{reg=Reg}) ->
+ {?REGArch, Reg}.
+
+-ifdef(HIPE_AMD64).
+temp_to_reg64(#x86_temp{reg=Reg}) ->
+ {reg64, Reg}.
+-endif.
+
+temp_to_reg32(#x86_temp{reg=Reg}) ->
+ {reg32, Reg}.
+temp_to_reg16(#x86_temp{reg=Reg}) ->
+ {reg16, Reg}.
+temp_to_reg8(#x86_temp{reg=Reg}) ->
+ {reg8, Reg}.
+
+temp_to_xmm(#x86_temp{reg=Reg}) ->
+ {xmm, Reg}.
+
+-ifdef(HIPE_AMD64).
+temp_to_rm64(#x86_temp{reg=Reg}) ->
+ {rm64, hipe_amd64_encode:rm_reg(Reg)}.
+-endif.
+
+temp_to_rmArch(#x86_temp{reg=Reg}) ->
+ {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
+temp_to_rm64fp(#x86_temp{reg=Reg}) ->
+ {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}.
+
+mem_to_ea(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {ea, EA}.
+
+mem_to_rm32(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}.
+
+mem_to_rmArch(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}.
+
+mem_to_rm64fp(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}.
+
+%%%%%%%%%%%%%%%%%
+mem_to_rm8(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}.
+
+mem_to_rm16(Mem) ->
+ EA = mem_to_ea_common(Mem),
+ {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}.
+%%%%%%%%%%%%%%%%%
+
+mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) ->
+ ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off);
+mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) ->
+ case Base band 2#111 of
+ 5 -> % ebp/rbp or r13
+ case Index band 2#111 of
+ 5 -> % ebp/rbp or r13
+ SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index),
+ SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX),
+ ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB);
+ _ ->
+ SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base),
+ SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX),
+ ?HIPE_X86_ENCODE:ea_sib(SIB)
+ end;
+ _ ->
+ SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index),
+ SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX),
+ ?HIPE_X86_ENCODE:ea_sib(SIB)
+ end;
+mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) ->
+ if
+ Off =:= 0 ->
+ case Base of
+ 4 -> %esp, use SIB w/o disp8
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_sib(SIB);
+ 5 -> %ebp, use disp8 w/o SIB
+ ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base);
+ 12 -> %r12, use SIB w/o disp8
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_sib(SIB);
+ 13 -> %r13, use disp8 w/o SIB
+ ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base);
+ _ -> %neither SIB nor disp8 needed
+ ?HIPE_X86_ENCODE:ea_base(Base)
+ end;
+ Off >= -128, Off =< 127 ->
+ Disp8 = Off band 16#FF,
+ case Base of
+ 4 -> %esp, must use SIB
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB);
+ 12 -> %r12, must use SIB
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB);
+ _ -> %use disp8 w/o SIB
+ ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base)
+ end;
+ true ->
+ case Base of
+ 4 -> %esp, must use SIB
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB);
+ 12 -> %r12, must use SIB
+ SIB = ?HIPE_X86_ENCODE:sib(Base),
+ ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB);
+ _ ->
+ ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base)
+ end
+ end.
+
+%% jmp_switch
+-ifdef(HIPE_AMD64).
+resolve_jmp_switch_arg(I,�_Context) ->
+ Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)),
+ Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)),
+ SINDEX = hipe_amd64_encode:sindex(3, Index),
+ SIB = hipe_amd64_encode:sib(Base, SINDEX),
+ EA =
+ if (Base =:= 5) or (Base =:= 13) ->
+ hipe_amd64_encode:ea_disp8_sib(0, SIB);
+ true ->
+ hipe_amd64_encode:ea_sib(SIB)
+ end,
+ {rm64,hipe_amd64_encode:rm_mem(EA)}.
+-else.
+resolve_jmp_switch_arg(I, {MFA,ConstMap}) ->
+ ConstNo = find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap),
+ Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}},
+ SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))),
+ EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly
+ {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}.
+-endif.
+
+%% lea reg, mem
+resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) ->
+ {temp_to_regArch(Dst),mem_to_ea(Src)}.
+
+resolve_sse2_op(Op) ->
+ case Op of
+ fadd -> addsd;
+ fdiv -> divsd;
+ fmul -> mulsd;
+ fsub -> subsd;
+ _ -> exit({?MODULE, unknown_sse2_operator, Op})
+ end.
+
+%% OP xmm, mem
+resolve_sse2_binop_args(Src=#x86_mem{type=double},
+ Dst=#x86_temp{type=double}) ->
+ {temp_to_xmm(Dst),mem_to_rm64fp(Src)};
+%% movsd mem, xmm
+resolve_sse2_binop_args(Src=#x86_temp{type=double},
+ Dst=#x86_mem{type=double}) ->
+ {mem_to_rm64fp(Dst),temp_to_xmm(Src)};
+%% OP xmm, xmm
+resolve_sse2_binop_args(Src=#x86_temp{type=double},
+ Dst=#x86_temp{type=double}) ->
+ {temp_to_xmm(Dst),temp_to_rm64fp(Src)}.
+
+%%% fmove -> cvtsi2sd or movsd
+resolve_sse2_fmove_args(Src, Dst) ->
+ case {Src,Dst} of
+ {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg
+ {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}};
+ {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem
+ {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}};
+ _ -> % movsd
+ {movsd, resolve_sse2_binop_args(Src, Dst)}
+ end.
+
+%%% xorpd xmm, mem
+resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) ->
+ {temp_to_xmm(Dst),
+ {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(
+ {?LOAD_ADDRESS,
+ {c_const, sse2_fnegate_mask}})}}}.
+
+%% mov mem, imm
+resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) ->
+ case Type of % to support byte, int16 and int32 stores
+ byte ->
+ ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm
+ {mem_to_rm8(Dst),{imm8,ByteImm}};
+ int16 ->
+ {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}};
+ int32 ->
+ {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false),
+ {mem_to_rm32(Dst),{imm32,Imm}};
+ _ ->
+ RMArch = mem_to_rmArch(Dst),
+ {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false),
+ {RMArch,{imm32,Imm}}
+ end;
+
+%% mov reg,mem
+resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) ->
+ case Type of
+ int32 -> % must be unsigned
+ {temp_to_reg32(Dst),mem_to_rm32(Src)};
+ _ ->
+ {temp_to_regArch(Dst),mem_to_rmArch(Src)}
+ end;
+
+%% mov mem,reg
+resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) ->
+ case Type of % to support byte, int16 and int32 stores
+ byte ->
+ {mem_to_rm8(Dst),temp_to_reg8(Src)};
+ int16 ->
+ {mem_to_rm16(Dst),temp_to_reg16(Src)};
+ int32 ->
+ {mem_to_rm32(Dst),temp_to_reg32(Src)};
+ tagged -> % tagged, untagged
+ {mem_to_rmArch(Dst),temp_to_regArch(Src)};
+ untagged -> % tagged, untagged
+ {mem_to_rmArch(Dst),temp_to_regArch(Src)}
+ end;
+
+%% mov reg,reg
+resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) ->
+ {temp_to_regArch(Dst),temp_to_rmArch(Src)};
+
+%% mov reg,imm
+resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) ->
+ {_,Imm} = translate_imm(Src, Context, false),
+ imm_move_args(Dst, Imm).
+
+-ifdef(HIPE_AMD64).
+imm_move_args(Dst, Imm) ->
+ if is_number(Imm), Imm >= 0 ->
+ {temp_to_reg32(Dst),{imm32,Imm}};
+ true ->
+ {temp_to_rm64(Dst),{imm32,Imm}}
+ end.
+-else.
+imm_move_args(Dst, Imm) ->
+ {temp_to_reg32(Dst),{imm32,Imm}}.
+-endif.
+
+-ifdef(HIPE_AMD64).
+translate_move64(I, Context) ->
+ Arg = resolve_move64_args(hipe_x86:move64_src(I),
+ hipe_x86:move64_dst(I),
+ Context),
+ [{mov, Arg, I}].
+
+%% mov reg,imm64
+resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) ->
+ {_,Imm} = translate_imm(Src, Context, false),
+ {temp_to_reg64(Dst),{imm64,Imm}}.
+-else.
+translate_move64(I, _Context) -> exit({?MODULE, I}).
+-endif.
+
+%%% mov{s,z}x
+resolve_movx_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}) ->
+ {temp_to_regArch(Dst),
+ case Type of
+ byte ->
+ mem_to_rm8(Src);
+ int16 ->
+ mem_to_rm16(Src);
+ int32 ->
+ mem_to_rm32(Src)
+ end}.
+
+%%% alu/cmp (_not_ test)
+resolve_alu_args(Src, Dst, Context) ->
+ case {Src,Dst} of
+ {#x86_imm{}, #x86_mem{}} ->
+ {mem_to_rmArch(Dst), translate_imm(Src, Context, true)};
+ {#x86_mem{}, #x86_temp{}} ->
+ {temp_to_regArch(Dst), mem_to_rmArch(Src)};
+ {#x86_temp{}, #x86_mem{}} ->
+ {mem_to_rmArch(Dst), temp_to_regArch(Src)};
+ {#x86_temp{}, #x86_temp{}} ->
+ {temp_to_regArch(Dst), temp_to_rmArch(Src)};
+ {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm
+ NewSrc = translate_imm(Src, Context, true),
+ NewDst =
+ case NewSrc of
+ {imm8,_} -> temp_to_rmArch(Dst);
+ {imm32,_} -> ?EAX
+ end,
+ {NewDst, NewSrc};
+ {#x86_imm{}, #x86_temp{}} ->
+ {temp_to_rmArch(Dst), translate_imm(Src, Context, true)}
+ end.
+
+%%% test
+resolve_test_args(Src, Dst, Context) ->
+ case Src of
+ #x86_imm{} -> % imm8 not allowed
+ {_ImmSize,ImmValue} = translate_imm(Src, Context, false),
+ NewDst =
+ case Dst of
+ #x86_temp{reg=0} -> ?EAX;
+ #x86_temp{} -> temp_to_rmArch(Dst);
+ #x86_mem{} -> mem_to_rmArch(Dst)
+ end,
+ {NewDst, {imm32,ImmValue}};
+ #x86_temp{} ->
+ NewDst =
+ case Dst of
+ #x86_temp{} -> temp_to_rmArch(Dst);
+ #x86_mem{} -> mem_to_rmArch(Dst)
+ end,
+ {NewDst, temp_to_regArch(Src)}
+ end.
+
+%%% shifts
+resolve_shift_args(Src, Dst, Context) ->
+ RM32 =
+ case Dst of
+ #x86_temp{} -> temp_to_rmArch(Dst);
+ #x86_mem{} -> mem_to_rmArch(Dst)
+ end,
+ Count =
+ case Src of
+ #x86_imm{value=1} -> 1;
+ #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8
+ #x86_temp{reg=1} -> cl % temp must be ecx
+ end,
+ {RM32, Count}.
+
+%% x87_binop mem
+resolve_x87_unop_arg(Arg=#x86_mem{type=Type})->
+ case Type of
+ 'double' -> {mem_to_rm64fp(Arg)};
+ 'untagged' -> {mem_to_rmArch(Arg)};
+ _ -> ?EXIT({fmovArgNotSupported,{Arg}})
+ end;
+resolve_x87_unop_arg(Arg=#x86_fpreg{}) ->
+ {fpreg_to_stack(Arg)};
+resolve_x87_unop_arg([]) ->
+ [].
+
+%% x87_binop mem, st(i)
+resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})->
+ {mem_to_rm64fp(Dst),fpreg_to_stack(Src)};
+%% x87_binop st(0), st(i)
+resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})->
+ {fpreg_to_stack(Dst),fpreg_to_stack(Src)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+mk_data_relocs(RefsFromConsts, LabelMap) ->
+ lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])).
+
+mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) ->
+ Map = [case Label of
+ {L,Pos} ->
+ Offset = find({MFA,L}, LabelMap),
+ {Pos,Offset};
+ {sorted,Base,OrderedLabels} ->
+ {sorted, Base, [begin
+ Offset = find({MFA,L}, LabelMap),
+ {Order, Offset}
+ end
+ || {L,Order} <- OrderedLabels]}
+ end
+ || Label <- Labels],
+ %% msg("Map: ~w Map\n",[Map]),
+ mk_data_relocs(Rest, LabelMap, [Map,Acc]);
+mk_data_relocs([],_,Acc) -> Acc.
+
+find({MFA,L},LabelMap) ->
+ gb_trees:get({MFA,L}, LabelMap).
+
+slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) ->
+ IsClosure = lists:member({M,F,A}, Closures),
+ IsExported = is_exported(F, A, Exports),
+ [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)];
+slim_sorted_exportmap([],_,_) -> [].
+
+is_exported(F, A, Exports) -> lists:member({F,A}, Exports).
+
+%%%
+%%% Assembly listing support (pp_asm option).
+%%%
+
+print(String, Arglist, Options) ->
+ ?when_option(pp_asm, Options, io:format(String, Arglist)).
+
+print_insn(Address, Bytes, I, Options) ->
+ ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)),
+ ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)).
+
+print_code_list_2([H | Tail]) ->
+ print_byte(H),
+ io:format(","),
+ print_code_list_2(Tail);
+print_code_list_2([]) ->
+ io:format("").
+
+print_insn_2(Address, Bytes, {_,_,OrigI}) ->
+ io:format("~8.16b | ", [Address]),
+ print_code_list(Bytes, 0),
+ ?HIPE_X86_PP:pp_insn(OrigI).
+
+print_code_list([Byte|Rest], Len) ->
+ print_byte(Byte),
+ print_code_list(Rest, Len+1);
+print_code_list([], Len) ->
+ fill_spaces(24-(Len*2)),
+ io:format(" | ").
+
+print_byte(Byte) ->
+ io:format("~2.16.0b", [Byte band 16#FF]).
+
+fill_spaces(N) when N > 0 ->
+ io:format(" "),
+ fill_spaces(N-1);
+fill_spaces(0) ->
+ [].
+
+%%%
+%%% Lookup a constant in a ConstMap.
+%%%
+
+find_const({MFA,Label},[{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) ->
+ ConstNo;
+find_const(N,[_|R]) ->
+ find_const(N,R);
+find_const(C,[]) ->
+ ?EXIT({constant_not_found,C}).
diff --git a/lib/hipe/x86/hipe_x86_cfg.erl b/lib/hipe/x86/hipe_x86_cfg.erl
new file mode 100644
index 0000000000..d15dcc061a
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_cfg.erl
@@ -0,0 +1,147 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(hipe_x86_cfg).
+
+-export([init/1,
+ labels/1, start_label/1,
+ succ/2, pred/2,
+ bb/2, bb_add/3]).
+-export([postorder/1, reverse_postorder/1]).
+-export([linearise/1, params/1, arity/1, redirect_jmp/3]).
+
+%%% these tell cfg.inc what to define (ugly as hell)
+-define(PRED_NEEDED,true).
+-define(BREADTH_ORDER,true).
+-define(PARAMS_NEEDED,true).
+-define(START_LABEL_UPDATE_NEEDED,true).
+
+-include("hipe_x86.hrl").
+-include("../flow/cfg.hrl").
+-include("../flow/cfg.inc").
+
+init(Defun) ->
+ %% XXX: this assumes that the code starts with a label insn.
+ %% Is that guaranteed?
+ Code = hipe_x86:defun_code(Defun),
+ StartLab = hipe_x86:label_label(hd(Code)),
+ Data = hipe_x86:defun_data(Defun),
+ IsClosure = hipe_x86:defun_is_closure(Defun),
+ MFA = hipe_x86:defun_mfa(Defun),
+ IsLeaf = hipe_x86:defun_is_leaf(Defun),
+ Formals = hipe_x86:defun_formals(Defun),
+ CFG0 = mk_empty_cfg(MFA, StartLab, Data, IsClosure, IsLeaf, Formals),
+ take_bbs(Code, CFG0).
+
+is_branch(I) ->
+ case I of
+ #jmp_fun{} -> true;
+ #jmp_label{} -> true;
+ #jmp_switch{} -> true;
+ #pseudo_call{} -> true;
+ #pseudo_jcc{} -> true;
+ #pseudo_tailcall{} -> true;
+ #ret{} -> true;
+ _ -> false
+ end.
+
+branch_successors(Branch) ->
+ case Branch of
+ #jmp_fun{} -> [];
+ #jmp_label{label=Label} -> [Label];
+ #jmp_switch{labels=Labels} -> Labels;
+ #pseudo_call{contlab=ContLab, sdesc=#x86_sdesc{exnlab=ExnLab}} ->
+ case ExnLab of
+ [] -> [ContLab];
+ _ -> [ContLab,ExnLab]
+ end;
+ #pseudo_jcc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab];
+ #pseudo_tailcall{} -> [];
+ #ret{} -> []
+ end.
+
+-ifdef(REMOVE_TRIVIAL_BBS_NEEDED).
+fails_to(_Instr) -> [].
+-endif.
+
+redirect_jmp(I, Old, New) ->
+ case I of
+ #jmp_label{label=Label} ->
+ if Old =:= Label -> I#jmp_label{label=New};
+ true -> I
+ end;
+ #pseudo_jcc{true_label=TrueLab, false_label=FalseLab} ->
+ J0 = if Old =:= TrueLab -> I#pseudo_jcc{true_label=New};
+ true -> I
+ end,
+ if Old =:= FalseLab -> J0#pseudo_jcc{false_label=New};
+ true -> J0
+ end;
+ %% handle pseudo_call too?
+ _ -> I
+ end.
+
+%%% XXX: fix if labels can occur in operands
+%% redirect_ops(_Labels, CFG, _Map) ->
+%% CFG.
+
+mk_goto(Label) ->
+ hipe_x86:mk_jmp_label(Label).
+
+is_label(I) ->
+ hipe_x86:is_label(I).
+
+label_name(Label) ->
+ hipe_x86:label_label(Label).
+
+mk_label(Name) ->
+ hipe_x86:mk_label(Name).
+
+%% is_comment(I) ->
+%% hipe_x86:is_comment(I).
+%%
+%% is_goto(I) ->
+%% hipe_x86:is_jmp_label(I).
+
+linearise(CFG) -> % -> defun, not insn list
+ MFA = function(CFG),
+ Formals = params(CFG),
+ Code = linearize_cfg(CFG),
+ Data = data(CFG),
+ VarRange = hipe_gensym:var_range(x86),
+ LabelRange = hipe_gensym:label_range(x86),
+ IsClosure = is_closure(CFG),
+ IsLeaf = is_leaf(CFG),
+ hipe_x86:mk_defun(MFA, Formals, IsClosure, IsLeaf,
+ Code, Data, VarRange, LabelRange).
+
+arity(CFG) ->
+ {_M,_F,A} = function(CFG),
+ A.
+
+%% init_gensym(CFG) ->
+%% HighestVar = find_highest_var(CFG),
+%% HighestLabel = find_highest_label(CFG),
+%% hipe_gensym:init(),
+%% hipe_gensym:set_var(x86, HighestVar),
+%% hipe_gensym:set_label(x86, HighestLabel).
+%%
+%% highest_var(Code) ->
+%% hipe_x86:highest_temp(Code).
diff --git a/lib/hipe/x86/hipe_x86_defuse.erl b/lib/hipe/x86/hipe_x86_defuse.erl
new file mode 100644
index 0000000000..3387f77595
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_defuse.erl
@@ -0,0 +1,160 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% compute def/use sets for x86 insns
+%%%
+%%% TODO:
+%%% - represent EFLAGS (condition codes) use/def by a virtual reg?
+%%% - should push use/def %esp?
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_DEFUSE, hipe_amd64_defuse).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(RV, rax).
+-else.
+-define(HIPE_X86_DEFUSE, hipe_x86_defuse).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(RV, eax).
+-endif.
+
+-module(?HIPE_X86_DEFUSE).
+-export([insn_def/1, insn_use/1]). %% src_use/1]).
+-include("../x86/hipe_x86.hrl").
+
+%%%
+%%% insn_def(Insn) -- Return set of temps defined by an instruction.
+%%%
+
+insn_def(I) ->
+ case I of
+ #alu{dst=Dst} -> dst_def(Dst);
+ #cmovcc{dst=Dst} -> dst_def(Dst);
+ #fmove{dst=Dst} -> dst_def(Dst);
+ #fp_binop{dst=Dst} -> dst_def(Dst);
+ #fp_unop{arg=Arg} -> dst_def(Arg);
+ #imul{temp=Temp} -> [Temp];
+ #lea{temp=Temp} -> [Temp];
+ #move{dst=Dst} -> dst_def(Dst);
+ #move64{dst=Dst} -> dst_def(Dst);
+ #movsx{dst=Dst} -> dst_def(Dst);
+ #movzx{dst=Dst} -> dst_def(Dst);
+ #pseudo_call{} -> call_clobbered();
+ #pseudo_spill{} -> [];
+ #pseudo_tailcall_prepare{} -> tailcall_clobbered();
+ #shift{dst=Dst} -> dst_def(Dst);
+ %% call, cmp, comment, jcc, jmp_fun, jmp_label, jmp_switch, label
+ %% pseudo_jcc, pseudo_tailcall, push, ret
+ _ -> []
+ end.
+
+dst_def(Dst) ->
+ case Dst of
+ #x86_temp{} -> [Dst];
+ #x86_fpreg{} -> [Dst];
+ _ -> []
+ end.
+
+call_clobbered() ->
+ [hipe_x86:mk_temp(R, T)
+ || {R,T} <- ?HIPE_X86_REGISTERS:call_clobbered()].
+
+tailcall_clobbered() ->
+ [hipe_x86:mk_temp(R, T)
+ || {R,T} <- ?HIPE_X86_REGISTERS:tailcall_clobbered()].
+
+%%%
+%%% insn_use(Insn) -- Return set of temps used by an instruction.
+%%%
+
+insn_use(I) ->
+ case I of
+ #alu{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+ #call{'fun'=Fun} -> addtemp(Fun, []);
+ #cmovcc{src=Src, dst=Dst} -> addtemp(Src, dst_use(Dst));
+ #cmp{src=Src, dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+ #fmove{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst));
+ #fp_unop{arg=Arg} -> addtemp(Arg, []);
+ #fp_binop{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+ #imul{imm_opt=ImmOpt,src=Src,temp=Temp} ->
+ addtemp(Src, case ImmOpt of [] -> addtemp(Temp, []); _ -> [] end);
+ #jmp_fun{'fun'=Fun} -> addtemp(Fun, []);
+ #jmp_switch{temp=Temp, jtab=JTab} -> addtemp(Temp, addtemp(JTab, []));
+ #lea{mem=Mem} -> addtemp(Mem, []);
+ #move{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst));
+ #move64{} -> [];
+ #movsx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst));
+ #movzx{src=Src,dst=Dst} -> addtemp(Src, dst_use(Dst));
+ #pseudo_call{'fun'=Fun,sdesc=#x86_sdesc{arity=Arity}} ->
+ addtemp(Fun, arity_use(Arity));
+ #pseudo_spill{args=Args} -> Args;
+ #pseudo_tailcall{'fun'=Fun,arity=Arity,stkargs=StkArgs} ->
+ addtemp(Fun, addtemps(StkArgs, addtemps(tailcall_clobbered(),
+ arity_use(Arity))));
+ #push{src=Src} -> addtemp(Src, []);
+ #ret{} -> [hipe_x86:mk_temp(?HIPE_X86_REGISTERS:?RV(), 'tagged')];
+ #shift{src=Src,dst=Dst} -> addtemp(Src, addtemp(Dst, []));
+ %% comment, jcc, jmp_label, label, pseudo_jcc, pseudo_tailcall_prepare
+ _ -> []
+ end.
+
+arity_use(Arity) ->
+ [hipe_x86:mk_temp(R, 'tagged')
+ || R <- ?HIPE_X86_REGISTERS:args(Arity)].
+
+dst_use(Dst) ->
+ case Dst of
+ #x86_mem{base=Base,off=Off} -> addbase(Base, addtemp(Off, []));
+ _ -> []
+ end.
+
+%%%
+%%% src_use(Src) -- Return set of temps used by a source operand.
+%%%
+
+%% src_use(Src) ->
+%% addtemp(Src, []).
+
+%%%
+%%% Auxiliary operations on sets of temps
+%%%
+
+addtemps([Arg|Args], Set) ->
+ addtemps(Args, addtemp(Arg, Set));
+addtemps([], Set) ->
+ Set.
+
+addtemp(Arg, Set) ->
+ case Arg of
+ #x86_temp{} -> add(Arg, Set);
+ #x86_mem{base=Base,off=Off} -> addtemp(Off, addbase(Base, Set));
+ #x86_fpreg{} -> add(Arg, Set);
+ _ -> Set
+ end.
+
+addbase(Base, Set) ->
+ case Base of
+ [] -> Set;
+ _ -> addtemp(Base, Set)
+ end.
+
+add(Arg, Set) ->
+ case lists:member(Arg, Set) of
+ false -> [Arg|Set];
+ _ -> Set
+ end.
diff --git a/lib/hipe/x86/hipe_x86_encode.erl b/lib/hipe/x86/hipe_x86_encode.erl
new file mode 100644
index 0000000000..db7f53ad26
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_encode.erl
@@ -0,0 +1,1302 @@
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% Copyright (C) 2000-2005 Mikael Pettersson
+%%%
+%%% This is the syntax of x86 r/m operands:
+%%%
+%%% opnd ::= reg mod == 11
+%%% | MEM[ea] mod != 11
+%%%
+%%% ea ::= disp32(reg) mod == 10, r/m != ESP
+%%% | disp32 sib12 mod == 10, r/m == 100
+%%% | disp8(reg) mod == 01, r/m != ESP
+%%% | disp8 sib12 mod == 01, r/m == 100
+%%% | (reg) mod == 00, r/m != ESP and EBP
+%%% | sib0 mod == 00, r/m == 100
+%%% | disp32 mod == 00, r/m == 101 [on x86-32]
+%%% | disp32(%rip) mod == 00, r/m == 101 [on x86-64]
+%%%
+%%% // sib0: mod == 00
+%%% sib0 ::= disp32(,index,scale) base == EBP, index != ESP
+%%% | disp32 base == EBP, index == 100
+%%% | (base,index,scale) base != EBP, index != ESP
+%%% | (base) base != EBP, index == 100
+%%%
+%%% // sib12: mod == 01 or 10
+%%% sib12 ::= (base,index,scale) index != ESP
+%%% | (base) index == 100
+%%%
+%%% scale ::= 00 | 01 | 10 | 11 index << scale
+%%%
+%%% Notes:
+%%%
+%%% 1. ESP cannot be used as index register.
+%%% 2. Use of ESP as base register requires a SIB byte.
+%%% 3. disp(reg), when reg != ESP, can be represented without
+%%% [r/m == reg] or with [r/m == 100, base == reg] a SIB byte.
+%%% 4. disp32 can be represented without [mod == 00, r/m == 101]
+%%% or with [mod == 00, r/m == 100, base == 101, index == 100]
+%%% a SIB byte.
+%%% 5. x86-32 and x86-64 interpret mod==00b r/m==101b EAs differently:
+%%% on x86-32 the disp32 is an absolute address, but on x86-64 the
+%%% disp32 is relative to the %rip of the next instruction.
+%%% Absolute disp32s need a SIB on x86-64.
+
+-module(hipe_x86_encode).
+
+-export([% condition codes
+ cc/1,
+ % 8-bit registers
+ %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+ % 32-bit registers
+ %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
+ % operands
+ sindex/2, sib/1, sib/2,
+ ea_disp32_base/2, ea_disp32_sib/2,
+ ea_disp8_base/2, ea_disp8_sib/2,
+ ea_base/1,
+ %% ea_disp32_sindex/1, % XXX: do not use on x86-32, only on x86-64
+ ea_disp32_sindex/2,
+ ea_sib/1, ea_disp32/1,
+ rm_reg/1, rm_mem/1,
+ % instructions
+ insn_encode/3, insn_sizeof/2]).
+
+%%-define(DO_HIPE_X86_ENCODE_TEST,true).
+-ifdef(DO_HIPE_X86_ENCODE_TEST).
+-export([dotest/0, dotest/1]). % for testing, don't use
+-endif.
+
+-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end).
+%-define(ASSERT(F,G), []).
+
+%%% condition codes
+
+-define(CC_O, 2#0000). % overflow
+-define(CC_NO, 2#0001). % no overflow
+-define(CC_B, 2#0010). % below, <u
+-define(CC_AE, 2#0011). % above or equal, >=u
+-define(CC_E, 2#0100). % equal
+-define(CC_NE, 2#0101). % not equal
+-define(CC_BE, 2#0110). % below or equal, <=u
+-define(CC_A, 2#0111). % above, >u
+-define(CC_S, 2#1000). % sign, +
+-define(CC_NS, 2#1001). % not sign, -
+-define(CC_PE, 2#1010). % parity even
+-define(CC_PO, 2#1011). % parity odd
+-define(CC_L, 2#1100). % less than, <s
+-define(CC_GE, 2#1101). % greater or equal, >=s
+-define(CC_LE, 2#1110). % less or equal, <=s
+-define(CC_G, 2#1111). % greater than, >s
+
+cc(o) -> ?CC_O;
+cc(no) -> ?CC_NO;
+cc(b) -> ?CC_B;
+cc(ae) -> ?CC_AE;
+cc(e) -> ?CC_E;
+cc(ne) -> ?CC_NE;
+cc(be) -> ?CC_BE;
+cc(a) -> ?CC_A;
+cc(s) -> ?CC_S;
+cc(ns) -> ?CC_NS;
+cc(pe) -> ?CC_PE;
+cc(po) -> ?CC_PO;
+cc(l) -> ?CC_L;
+cc(ge) -> ?CC_GE;
+cc(le) -> ?CC_LE;
+cc(g) -> ?CC_G.
+
+%%% 8-bit registers
+
+-define(AL, 2#000).
+-define(CL, 2#001).
+-define(DL, 2#010).
+-define(BL, 2#011).
+-define(AH, 2#100).
+-define(CH, 2#101).
+-define(DH, 2#110).
+-define(BH, 2#111).
+
+%% al() -> ?AL.
+%% cl() -> ?CL.
+%% dl() -> ?DL.
+%% bl() -> ?BL.
+%% ah() -> ?AH.
+%% ch() -> ?CH.
+%% dh() -> ?DH.
+%% bh() -> ?BH.
+
+%%% 32-bit registers
+
+-define(EAX, 2#000).
+-define(ECX, 2#001).
+-define(EDX, 2#010).
+-define(EBX, 2#011).
+-define(ESP, 2#100).
+-define(EBP, 2#101).
+-define(ESI, 2#110).
+-define(EDI, 2#111).
+
+%% eax() -> ?EAX.
+%% ecx() -> ?ECX.
+%% edx() -> ?EDX.
+%% ebx() -> ?EBX.
+%% esp() -> ?ESP.
+%% ebp() -> ?EBP.
+%% esi() -> ?ESI.
+%% edi() -> ?EDI.
+
+%%% r/m operands
+
+sindex(Scale, Index) when is_integer(Scale), is_integer(Index) ->
+ ?ASSERT(sindex, Scale >= 0),
+ ?ASSERT(sindex, Scale =< 3),
+ ?ASSERT(sindex, Index =/= ?ESP),
+ {sindex, Scale, Index}.
+
+-record(sib, {sindex_opt, base :: integer()}).
+sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}.
+sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}.
+
+ea_disp32_base(Disp32, Base) when is_integer(Base) ->
+ ?ASSERT(ea_disp32_base, Base =/= ?ESP),
+ {ea_disp32_base, Disp32, Base}.
+ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}.
+ea_disp8_base(Disp8, Base) when is_integer(Base) ->
+ ?ASSERT(ea_disp8_base, Base =/= ?ESP),
+ {ea_disp8_base, Disp8, Base}.
+ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}.
+ea_base(Base) when is_integer(Base) ->
+ ?ASSERT(ea_base, Base =/= ?ESP),
+ ?ASSERT(ea_base, Base =/= ?EBP),
+ {ea_base, Base}.
+%% ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}.
+ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}.
+ea_sib(SIB) ->
+ ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP),
+ {ea_sib, SIB}.
+ea_disp32(Disp32) -> {ea_disp32, Disp32}.
+
+rm_reg(Reg) -> {rm_reg, Reg}.
+rm_mem(EA) -> {rm_mem, EA}.
+
+mk_modrm(Mod, RO, RM) ->
+ (Mod bsl 6) bor (RO bsl 3) bor RM.
+
+mk_sib(Scale, Index, Base) ->
+ (Scale bsl 6) bor (Index bsl 3) bor Base.
+
+le16(Word, Tail) ->
+ [Word band 16#FF, (Word bsr 8) band 16#FF | Tail].
+
+le32(Word, Tail) when is_integer(Word) ->
+ [Word band 16#FF, (Word bsr 8) band 16#FF,
+ (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail];
+le32({Tag,Val}, Tail) -> % a relocatable datum
+ [{le32,Tag,Val} | Tail].
+
+enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index};
+enc_sindex_opt(none) -> {2#00, 2#100}.
+
+enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) ->
+ {Scale, Index} = enc_sindex_opt(SindexOpt),
+ mk_sib(Scale, Index, Base).
+
+enc_ea(EA, RO, Tail) ->
+ case EA of
+ {ea_disp32_base, Disp32, Base} ->
+ [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)];
+ {ea_disp32_sib, Disp32, SIB} ->
+ [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)];
+ {ea_disp8_base, Disp8, Base} ->
+ [mk_modrm(2#01, RO, Base), Disp8 | Tail];
+ {ea_disp8_sib, Disp8, SIB} ->
+ [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail];
+ {ea_base, Base} ->
+ [mk_modrm(2#00, RO, Base) | Tail];
+ {ea_disp32_sindex, Disp32, SindexOpt} ->
+ {Scale, Index} = enc_sindex_opt(SindexOpt),
+ SIB = mk_sib(Scale, Index, 2#101),
+ MODRM = mk_modrm(2#00, RO, 2#100),
+ [MODRM, SIB | le32(Disp32, Tail)];
+ {ea_sib, SIB} ->
+ [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail];
+ {ea_disp32, Disp32} ->
+ [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)]
+ end.
+
+encode_rm(RM, RO, Tail) ->
+ case RM of
+ {rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail];
+ {rm_mem, EA} -> enc_ea(EA, RO, Tail)
+ end.
+
+sizeof_ea(EA) ->
+ case element(1, EA) of
+ ea_disp32_base -> 5;
+ ea_disp32_sib -> 6;
+ ea_disp8_base -> 2;
+ ea_disp8_sib -> 3;
+ ea_base -> 1;
+ ea_disp32_sindex -> 6;
+ ea_sib -> 2;
+ ea_disp32 -> 5
+ end.
+
+sizeof_rm(RM) ->
+ case RM of
+ {rm_reg, _} -> 1;
+ {rm_mem, EA} -> sizeof_ea(EA)
+ end.
+
+%%% Floating point stack positions
+
+-define(ST0, 2#000).
+-define(ST1, 2#001).
+-define(ST2, 2#010).
+-define(ST3, 2#011).
+-define(ST4, 2#100).
+-define(ST5, 2#101).
+-define(ST6, 2#110).
+-define(ST7, 2#111).
+
+st(0) -> ?ST0;
+st(1) -> ?ST1;
+st(2) -> ?ST2;
+st(3) -> ?ST3;
+st(4) -> ?ST4;
+st(5) -> ?ST5;
+st(6) -> ?ST6;
+st(7) -> ?ST7.
+
+
+%%% Instructions
+%%%
+%%% Insn ::= {Op,Opnds}
+%%% Opnds ::= {Opnd1,...,Opndn} (n >= 0)
+%%% Opnd ::= eax | ax | al | 1 | cl
+%%% | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8}
+%%% | {rm32,RM32} | {rm16,RM16} | {rm8,RM8}
+%%% | {rel32,Rel32} | {rel8,Rel8}
+%%% | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8}
+%%% | {cc,CC}
+%%% | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8}
+%%% | {ea,EA}
+
+-define(PFX_OPND, 16#66).
+
+arith_binop_encode(SubOpcode, Opnds) ->
+ %% add, or, adc, sbb, and, sub, xor, cmp
+ case Opnds of
+ {eax, {imm32,Imm32}} ->
+ [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])];
+ {{rm32,RM32}, {imm32,Imm32}} ->
+ [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))];
+ {{rm32,RM32}, {imm8,Imm8}} ->
+ [16#83 | encode_rm(RM32, SubOpcode, [Imm8])];
+ {{rm32,RM32}, {reg32,Reg32}} ->
+ [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
+ {{reg32,Reg32}, {rm32,RM32}} ->
+ [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])]
+ end.
+
+arith_binop_sizeof(Opnds) ->
+ %% add, or, adc, sbb, and, sub, xor, cmp
+ case Opnds of
+ {eax, {imm32,_}} ->
+ 1 + 4;
+ {{rm32,RM32}, {imm32,_}} ->
+ 1 + sizeof_rm(RM32) + 4;
+ {{rm32,RM32}, {imm8,_}} ->
+ 1 + sizeof_rm(RM32) + 1;
+ {{rm32,RM32}, {reg32,_}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg32,_}, {rm32,RM32}} ->
+ 1 + sizeof_rm(RM32)
+ end.
+
+bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) -> % bsf, bsr
+ [16#0F, Opcode | encode_rm(RM32, Reg32, [])].
+
+bs_op_sizeof({{reg32,_}, {rm32,RM32}}) -> % bsf, bsr
+ 2 + sizeof_rm(RM32).
+
+bswap_encode({{reg32,Reg32}}) ->
+ [16#0F, 16#C8 bor Reg32].
+
+bswap_sizeof({{reg32,_}}) ->
+ 2.
+
+bt_op_encode(SubOpcode, Opnds) -> % bt, btc, btr, bts
+ case Opnds of
+ {{rm32,RM32}, {reg32,Reg32}} ->
+ [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
+ {{rm32,RM32}, {imm8,Imm8}} ->
+ [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])]
+ end.
+
+bt_op_sizeof(Opnds) -> % bt, btc, btr, bts
+ case Opnds of
+ {{rm32,RM32}, {reg32,_}} ->
+ 2 + sizeof_rm(RM32);
+ {{rm32,RM32}, {imm8,_}} ->
+ 2 + sizeof_rm(RM32) + 1
+ end.
+
+call_encode(Opnds) ->
+ case Opnds of
+ {{rel32,Rel32}} ->
+ [16#E8 | le32(Rel32, [])];
+ {{rm32,RM32}} ->
+ [16#FF | encode_rm(RM32, 2#010, [])]
+ end.
+
+call_sizeof(Opnds) ->
+ case Opnds of
+ {{rel32,_}} ->
+ 1 + 4;
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32)
+ end.
+
+cbw_encode({}) ->
+ [?PFX_OPND, 16#98].
+
+cbw_sizeof({}) ->
+ 2.
+
+nullary_op_encode(Opcode, {}) ->
+ %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
+ [Opcode].
+
+nullary_op_sizeof({}) ->
+ %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
+ 1.
+
+cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) ->
+ [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])].
+
+cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) ->
+ 2 + sizeof_rm(RM32).
+
+incdec_encode(SubOpcode, Opnds) -> % SubOpcode is either 0 or 1
+ case Opnds of
+ {{rm32,RM32}} ->
+ [16#FF | encode_rm(RM32, SubOpcode, [])];
+ {{reg32,Reg32}} ->
+ [16#40 bor (SubOpcode bsl 3) bor Reg32]
+ end.
+
+incdec_sizeof(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg32,_}} ->
+ 1
+ end.
+
+arith_unop_encode(Opcode, {{rm32,RM32}}) -> % div, idiv, mul, neg, not
+ [16#F7 | encode_rm(RM32, Opcode, [])].
+
+arith_unop_sizeof({{rm32,RM32}}) -> % div, idiv, mul, neg, not
+ 1 + sizeof_rm(RM32).
+
+enter_encode({{imm16,Imm16}, {imm8,Imm8}}) ->
+ [16#C8 | le16(Imm16, [Imm8])].
+
+enter_sizeof({{imm16,_}, {imm8,_}}) ->
+ 1 + 2 + 1.
+
+imul_encode(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} -> % <edx,eax> *= rm32
+ [16#F7 | encode_rm(RM32, 2#101, [])];
+ {{reg32,Reg32}, {rm32,RM32}} -> % reg *= rm32
+ [16#0F, 16#AF | encode_rm(RM32, Reg32, [])];
+ {{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} -> % reg := rm32 * sext(imm8)
+ [16#6B | encode_rm(RM32, Reg32, [Imm8])];
+ {{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} -> % reg := rm32 * imm32
+ [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))]
+ end.
+
+imul_sizeof(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg32,_}, {rm32,RM32}} ->
+ 2 + sizeof_rm(RM32);
+ {{reg32,_}, {rm32,RM32}, {imm8,_}} ->
+ 1 + sizeof_rm(RM32) + 1;
+ {{reg32,_}, {rm32,RM32}, {imm32,_}} ->
+ 1 + sizeof_rm(RM32) + 4
+ end.
+
+jcc_encode(Opnds) ->
+ case Opnds of
+ {{cc,CC}, {rel8,Rel8}} ->
+ [16#70 bor CC, Rel8];
+ {{cc,CC}, {rel32,Rel32}} ->
+ [16#0F, 16#80 bor CC | le32(Rel32, [])]
+ end.
+
+jcc_sizeof(Opnds) ->
+ case Opnds of
+ {{cc,_}, {rel8,_}} ->
+ 2;
+ {{cc,_}, {rel32,_}} ->
+ 2 + 4
+ end.
+
+jmp8_op_encode(Opcode, {{rel8,Rel8}}) -> % jecxz, loop, loope, loopne
+ [Opcode, Rel8].
+
+jmp8_op_sizeof({{rel8,_}}) -> % jecxz, loop, loope, loopne
+ 2.
+
+jmp_encode(Opnds) ->
+ case Opnds of
+ {{rel8,Rel8}} ->
+ [16#EB, Rel8];
+ {{rel32,Rel32}} ->
+ [16#E9 | le32(Rel32, [])];
+ {{rm32,RM32}} ->
+ [16#FF | encode_rm(RM32, 2#100, [])]
+ end.
+
+jmp_sizeof(Opnds) ->
+ case Opnds of
+ {{rel8,_}} ->
+ 2;
+ {{rel32,_}} ->
+ 1 + 4;
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32)
+ end.
+
+lea_encode({{reg32,Reg32}, {ea,EA}}) ->
+ [16#8D | enc_ea(EA, Reg32, [])].
+
+lea_sizeof({{reg32,_}, {ea,EA}}) ->
+ 1 + sizeof_ea(EA).
+
+mov_encode(Opnds) ->
+ case Opnds of
+ {{rm8,RM8}, {reg8,Reg8}} ->
+ [16#88 | encode_rm(RM8, Reg8, [])];
+ {{rm16,RM16}, {reg16,Reg16}} ->
+ [?PFX_OPND, 16#89 | encode_rm(RM16, Reg16, [])];
+ {{rm32,RM32}, {reg32,Reg32}} ->
+ [16#89 | encode_rm(RM32, Reg32, [])];
+ {{reg8,Reg8}, {rm8,RM8}} ->
+ [16#8A | encode_rm(RM8, Reg8, [])];
+ {{reg16,Reg16}, {rm16,RM16}} ->
+ [?PFX_OPND, 16#8B | encode_rm(RM16, Reg16, [])];
+ {{reg32,Reg32}, {rm32,RM32}} ->
+ [16#8B | encode_rm(RM32, Reg32, [])];
+ {al, {moffs8,Moffs8}} ->
+ [16#A0 | le32(Moffs8, [])];
+ {ax, {moffs16,Moffs16}} ->
+ [?PFX_OPND, 16#A1 | le32(Moffs16, [])];
+ {eax, {moffs32,Moffs32}} ->
+ [16#A1 | le32(Moffs32, [])];
+ {{moffs8,Moffs8}, al} ->
+ [16#A2 | le32(Moffs8, [])];
+ {{moffs16,Moffs16}, ax} ->
+ [?PFX_OPND, 16#A3 | le32(Moffs16, [])];
+ {{moffs32,Moffs32}, eax} ->
+ [16#A3 | le32(Moffs32, [])];
+ {{reg8,Reg8}, {imm8,Imm8}} ->
+ [16#B0 bor Reg8, Imm8];
+ {{reg16,Reg16}, {imm16,Imm16}} ->
+ [?PFX_OPND, 16#B8 bor Reg16 | le16(Imm16, [])];
+ {{reg32,Reg32}, {imm32,Imm32}} ->
+ [16#B8 bor Reg32 | le32(Imm32, [])];
+ {{rm8,RM8}, {imm8,Imm8}} ->
+ [16#C6 | encode_rm(RM8, 2#000, [Imm8])];
+ {{rm16,RM16}, {imm16,Imm16}} ->
+ [?PFX_OPND, 16#C7 | encode_rm(RM16, 2#000, le16(Imm16, []))];
+ {{rm32,RM32}, {imm32,Imm32}} ->
+ [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))]
+ end.
+
+mov_sizeof(Opnds) ->
+ case Opnds of
+ {{rm8,RM8}, {reg8,_}} ->
+ 1 + sizeof_rm(RM8);
+ {{rm16,RM16}, {reg16,_}} ->
+ 2 + sizeof_rm(RM16);
+ {{rm32,RM32}, {reg32,_}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg8,_}, {rm8,RM8}} ->
+ 1 + sizeof_rm(RM8);
+ {{reg16,_}, {rm16,RM16}} ->
+ 2 + sizeof_rm(RM16);
+ {{reg32,_}, {rm32,RM32}} ->
+ 1 + sizeof_rm(RM32);
+ {al, {moffs8,_}} ->
+ 1 + 4;
+ {ax, {moffs16,_}} ->
+ 2 + 4;
+ {eax, {moffs32,_}} ->
+ 1 + 4;
+ {{moffs8,_}, al} ->
+ 1 + 4;
+ {{moffs16,_}, ax} ->
+ 2 + 4;
+ {{moffs32,_}, eax} ->
+ 1 + 4;
+ {{reg8,_}, {imm8,_}} ->
+ 2;
+ {{reg16,_}, {imm16,_}} ->
+ 2 + 2;
+ {{reg32,_}, {imm32,_}} ->
+ 1 + 4;
+ {{rm8,RM8}, {imm8,_}} ->
+ 1 + sizeof_rm(RM8) + 1;
+ {{rm16,RM16}, {imm16,_}} ->
+ 2 + sizeof_rm(RM16) + 2;
+ {{rm32,RM32}, {imm32,_}} ->
+ 1 + sizeof_rm(RM32) + 4
+ end.
+
+movx_op_encode(Opcode, Opnds) -> % movsx, movzx
+ case Opnds of
+ {{reg16,Reg16}, {rm8,RM8}} ->
+ [?PFX_OPND, 16#0F, Opcode | encode_rm(RM8, Reg16, [])];
+ {{reg32,Reg32}, {rm8,RM8}} ->
+ [16#0F, Opcode | encode_rm(RM8, Reg32, [])];
+ {{reg32,Reg32}, {rm16,RM16}} ->
+ [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])]
+ end.
+
+movx_op_sizeof(Opnds) ->
+ case Opnds of
+ {{reg16,_}, {rm8,RM8}} ->
+ 3 + sizeof_rm(RM8);
+ {{reg32,_}, {rm8,RM8}} ->
+ 2 + sizeof_rm(RM8);
+ {{reg32,_}, {rm16,RM16}} ->
+ 2 + sizeof_rm(RM16)
+ end.
+
+pop_encode(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ [16#8F | encode_rm(RM32, 2#000, [])];
+ {{reg32,Reg32}} ->
+ [16#58 bor Reg32]
+ end.
+
+pop_sizeof(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg32,_}} ->
+ 1
+ end.
+
+push_encode(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ [16#FF | encode_rm(RM32, 2#110, [])];
+ {{reg32,Reg32}} ->
+ [16#50 bor Reg32];
+ {{imm8,Imm8}} -> % sign-extended
+ [16#6A, Imm8];
+ {{imm32,Imm32}} ->
+ [16#68 | le32(Imm32, [])]
+ end.
+
+push_sizeof(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}} ->
+ 1 + sizeof_rm(RM32);
+ {{reg32,_}} ->
+ 1;
+ {{imm8,_}} ->
+ 2;
+ {{imm32,_}} ->
+ 1 + 4
+ end.
+
+shift_op_encode(SubOpcode, Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr
+ case Opnds of
+ {{rm32,RM32}, 1} ->
+ [16#D1 | encode_rm(RM32, SubOpcode, [])];
+ {{rm32,RM32}, cl} ->
+ [16#D3 | encode_rm(RM32, SubOpcode, [])];
+ {{rm32,RM32}, {imm8,Imm8}} ->
+ [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])];
+ {{rm16,RM16}, {imm8,Imm8}} ->
+ [?PFX_OPND, 16#C1 | encode_rm(RM16, SubOpcode, [Imm8])]
+ end.
+
+shift_op_sizeof(Opnds) -> % rcl, rcr, rol, ror, sar, shl, shr
+ case Opnds of
+ {{rm32,RM32}, 1} ->
+ 1 + sizeof_rm(RM32);
+ {{rm32,RM32}, cl} ->
+ 1 + sizeof_rm(RM32);
+ {{rm32,RM32}, {imm8,_Imm8}} ->
+ 1 + sizeof_rm(RM32) + 1;
+ {{rm16,RM16}, {imm8,_Imm8}} ->
+ 1 + 1 + sizeof_rm(RM16) + 1
+ end.
+
+ret_encode(Opnds) ->
+ case Opnds of
+ {} ->
+ [16#C3];
+ {{imm16,Imm16}} ->
+ [16#C2 | le16(Imm16, [])]
+ end.
+
+ret_sizeof(Opnds) ->
+ case Opnds of
+ {} ->
+ 1;
+ {{imm16,_}} ->
+ 1 + 2
+ end.
+
+setcc_encode({{cc,CC}, {rm8,RM8}}) ->
+ [16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])].
+
+setcc_sizeof({{cc,_}, {rm8,RM8}}) ->
+ 2 + sizeof_rm(RM8).
+
+shd_op_encode(Opcode, Opnds) ->
+ case Opnds of
+ {{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} ->
+ [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])];
+ {{rm32,RM32}, {reg32,Reg32}, cl} ->
+ [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])]
+ end.
+
+shd_op_sizeof(Opnds) ->
+ case Opnds of
+ {{rm32,RM32}, {reg32,_}, {imm8,_}} ->
+ 2 + sizeof_rm(RM32) + 1;
+ {{rm32,RM32}, {reg32,_}, cl} ->
+ 2 + sizeof_rm(RM32)
+ end.
+
+test_encode(Opnds) ->
+ case Opnds of
+ {eax, {imm32,Imm32}} ->
+ [16#A9 | le32(Imm32, [])];
+ {{rm32,RM32}, {imm32,Imm32}} ->
+ [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
+ {{rm32,RM32}, {reg32,Reg32}} ->
+ [16#85 | encode_rm(RM32, Reg32, [])]
+ end.
+
+test_sizeof(Opnds) ->
+ case Opnds of
+ {eax, {imm32,_}} ->
+ 1 + 4;
+ {{rm32,RM32}, {imm32,_}} ->
+ 1 + sizeof_rm(RM32) + 4;
+ {{rm32,RM32}, {reg32,_}} ->
+ 1 + sizeof_rm(RM32)
+ end.
+
+fild_encode(Opnds) ->
+ %% The operand cannot be a register!
+ {{rm32, RM32}} = Opnds,
+ [16#DB | encode_rm(RM32, 2#000, [])].
+
+fild_sizeof(Opnds) ->
+ {{rm32, RM32}} = Opnds,
+ 1 + sizeof_rm(RM32).
+
+fld_encode(Opnds) ->
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ [16#DD | encode_rm(RM64fp, 2#000, [])];
+ {{fpst, St}} ->
+ [16#D9, 16#C0 bor st(St)]
+ end.
+
+fld_sizeof(Opnds) ->
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ 1 + sizeof_rm(RM64fp);
+ {{fpst, _}} ->
+ 2
+ end.
+
+fp_comm_arith_encode(OpCode, Opnds) ->
+ %% fadd, fmul
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ [16#DC | encode_rm(RM64fp, OpCode, [])];
+ {{fpst,0}, {fpst,St}} ->
+ [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)];
+ {{fpst,St}, {fpst,0}} ->
+ [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)]
+ end.
+
+fp_comm_arith_pop_encode(OpCode, Opnds) ->
+ %% faddp, fmulp
+ case Opnds of
+ [] ->
+ [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)];
+ {{fpst,St},{fpst,0}} ->
+ [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)]
+ end.
+
+fp_arith_encode(OpCode, Opnds) ->
+ %% fdiv, fsub
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ [16#DC | encode_rm(RM64fp, OpCode, [])];
+ {{fpst,0}, {fpst,St}} ->
+ OpCode0 = OpCode band 2#110,
+ [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
+ {{fpst,St}, {fpst,0}} ->
+ OpCode0 = OpCode bor 1,
+ [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+ end.
+
+fp_arith_pop_encode(OpCode, Opnds) ->
+ %% fdivp, fsubp
+ OpCode0 = OpCode bor 1,
+ case Opnds of
+ [] ->
+ [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)];
+ {{fpst,St}, {fpst,0}} ->
+ [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)]
+ end.
+
+fp_arith_rev_encode(OpCode, Opnds) ->
+ %% fdivr, fsubr
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ [16#DC | encode_rm(RM64fp, OpCode, [])];
+ {{fpst,0}, {fpst,St}} ->
+ OpCode0 = OpCode bor 1,
+ [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
+ {{fpst,St}, {fpst,0}} ->
+ OpCode0 = OpCode band 2#110,
+ [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+ end.
+
+fp_arith_rev_pop_encode(OpCode, Opnds) ->
+ %% fdivrp, fsubrp
+ OpCode0 = OpCode band 2#110,
+ case Opnds of
+ [] ->
+ [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)];
+ {{fpst,St}, {fpst, 0}} ->
+ [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+ end.
+
+fp_arith_sizeof(Opnds) ->
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ 1 + sizeof_rm(RM64fp);
+ {{fpst,0}, {fpst,_}} ->
+ 2;
+ {{fpst,_}, {fpst,0}} ->
+ 2
+ end.
+
+fst_encode(OpCode, Opnds) ->
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ [16#DD | encode_rm(RM64fp, OpCode, [])];
+ {{fpst, St}} ->
+ [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)]
+ end.
+
+fst_sizeof(Opnds) ->
+ case Opnds of
+ {{rm64fp, RM64fp}} ->
+ 1 + sizeof_rm(RM64fp);
+ {{fpst, _}} ->
+ 2
+ end.
+
+fchs_encode() ->
+ [16#D9, 16#E0].
+fchs_sizeof() ->
+ 2.
+
+ffree_encode({{fpst, St}})->
+ [16#DD, 16#C0 bor st(St)].
+ffree_sizeof() ->
+ 2.
+
+fwait_encode() ->
+ [16#9B].
+fwait_sizeof() ->
+ 1.
+
+fxch_encode(Opnds) ->
+ case Opnds of
+ [] ->
+ [16#D9, 16#C8 bor st(1)];
+ {{fpst, St}} ->
+ [16#D9, 16#C8 bor st(St)]
+ end.
+fxch_sizeof() ->
+ 2.
+
+insn_encode(Op, Opnds, Offset) ->
+ Bytes = insn_encode_internal(Op, Opnds),
+ case has_relocs(Bytes) of
+ false -> % the common case
+ {Bytes, []};
+ _ ->
+ fix_relocs(Bytes, Offset, [], [])
+ end.
+
+has_relocs([{le32,_,_}|_]) -> true;
+has_relocs([_|Bytes]) -> has_relocs(Bytes);
+has_relocs([]) -> false.
+
+fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) ->
+ fix_relocs(Bytes, Offset+4,
+ [16#00, 16#00, 16#00, 16#00 | Code],
+ [{Tag,Offset,Val}|Relocs]);
+fix_relocs([Byte|Bytes], Offset, Code, Relocs) ->
+ fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs);
+fix_relocs([], _Offset, Code, Relocs) ->
+ {lists:reverse(Code), lists:reverse(Relocs)}.
+
+insn_encode_internal(Op, Opnds) ->
+ case Op of
+ 'adc' -> arith_binop_encode(2#010, Opnds);
+ 'add' -> arith_binop_encode(2#000, Opnds);
+ 'and' -> arith_binop_encode(2#100, Opnds);
+ 'bsf' -> bs_op_encode(16#BC, Opnds);
+ 'bsr' -> bs_op_encode(16#BD, Opnds);
+ 'bswap' -> bswap_encode(Opnds);
+ 'bt' -> bt_op_encode(2#100, Opnds);
+ 'btc' -> bt_op_encode(2#111, Opnds);
+ 'btr' -> bt_op_encode(2#110, Opnds);
+ 'bts' -> bt_op_encode(2#101, Opnds);
+ 'call' -> call_encode(Opnds);
+ 'cbw' -> cbw_encode(Opnds);
+ 'cdq' -> nullary_op_encode(16#99, Opnds);
+ 'clc' -> nullary_op_encode(16#F8, Opnds);
+ 'cld' -> nullary_op_encode(16#FC, Opnds);
+ 'cmc' -> nullary_op_encode(16#F5, Opnds);
+ 'cmovcc' -> cmovcc_encode(Opnds);
+ 'cmp' -> arith_binop_encode(2#111, Opnds);
+ 'cwde' -> nullary_op_encode(16#98, Opnds);
+ 'dec' -> incdec_encode(2#001, Opnds);
+ 'div' -> arith_unop_encode(2#110, Opnds);
+ 'enter' -> enter_encode(Opnds);
+ 'fadd' -> fp_comm_arith_encode(2#000, Opnds);
+ 'faddp' -> fp_comm_arith_pop_encode(2#000, Opnds);
+ 'fchs' -> fchs_encode();
+ 'fdiv' -> fp_arith_encode(2#110, Opnds);
+ 'fdivp' -> fp_arith_pop_encode(2#110, Opnds);
+ 'fdivr' -> fp_arith_rev_encode(2#111, Opnds);
+ 'fdivrp' -> fp_arith_rev_pop_encode(2#111, Opnds);
+ 'ffree' -> ffree_encode(Opnds);
+ 'fild' -> fild_encode(Opnds);
+ 'fld' -> fld_encode(Opnds);
+ 'fmul' -> fp_comm_arith_encode(2#001, Opnds);
+ 'fmulp' -> fp_comm_arith_pop_encode(2#001, Opnds);
+ 'fst' -> fst_encode(2#010, Opnds);
+ 'fstp' -> fst_encode(2#011, Opnds);
+ 'fsub' -> fp_arith_encode(2#100, Opnds);
+ 'fsubp' -> fp_arith_pop_encode(2#100, Opnds);
+ 'fsubr' -> fp_arith_rev_encode(2#101, Opnds);
+ 'fsubrp' -> fp_arith_rev_pop_encode(2#101, Opnds);
+ 'fwait' -> fwait_encode();
+ 'fxch' -> fxch_encode(Opnds);
+ 'idiv' -> arith_unop_encode(2#111, Opnds);
+ 'imul' -> imul_encode(Opnds);
+ 'inc' -> incdec_encode(2#000, Opnds);
+ 'into' -> nullary_op_encode(16#CE, Opnds);
+ 'jcc' -> jcc_encode(Opnds);
+ 'jecxz' -> jmp8_op_encode(16#E3, Opnds);
+ 'jmp' -> jmp_encode(Opnds);
+ 'lea' -> lea_encode(Opnds);
+ 'leave' -> nullary_op_encode(16#C9, Opnds);
+ 'loop' -> jmp8_op_encode(16#E2, Opnds);
+ 'loope' -> jmp8_op_encode(16#E1, Opnds);
+ 'loopne' -> jmp8_op_encode(16#E0, Opnds);
+ 'mov' -> mov_encode(Opnds);
+ 'movsx' -> movx_op_encode(16#BE, Opnds);
+ 'movzx' -> movx_op_encode(16#B6, Opnds);
+ 'mul' -> arith_unop_encode(2#100, Opnds);
+ 'neg' -> arith_unop_encode(2#011, Opnds);
+ 'nop' -> nullary_op_encode(16#90, Opnds);
+ 'not' -> arith_unop_encode(2#010, Opnds);
+ 'or' -> arith_binop_encode(2#001, Opnds);
+ 'pop' -> pop_encode(Opnds);
+ 'prefix_fs' -> nullary_op_encode(16#64, Opnds);
+ 'push' -> push_encode(Opnds);
+ 'rcl' -> shift_op_encode(2#010, Opnds);
+ 'rcr' -> shift_op_encode(2#011, Opnds);
+ 'ret' -> ret_encode(Opnds);
+ 'rol' -> shift_op_encode(2#000, Opnds);
+ 'ror' -> shift_op_encode(2#001, Opnds);
+ 'sar' -> shift_op_encode(2#111, Opnds);
+ 'sbb' -> arith_binop_encode(2#011, Opnds);
+ 'setcc' -> setcc_encode(Opnds);
+ 'shl' -> shift_op_encode(2#100, Opnds);
+ 'shld' -> shd_op_encode(16#A4, Opnds);
+ 'shr' -> shift_op_encode(2#101, Opnds);
+ 'shrd' -> shd_op_encode(16#AC, Opnds);
+ 'stc' -> nullary_op_encode(16#F9, Opnds);
+ 'std' -> nullary_op_encode(16#FD, Opnds);
+ 'sub' -> arith_binop_encode(2#101, Opnds);
+ 'test' -> test_encode(Opnds);
+ 'xor' -> arith_binop_encode(2#110, Opnds);
+ _ -> exit({?MODULE,insn_encode,Op})
+ end.
+
+insn_sizeof(Op, Opnds) ->
+ case Op of
+ 'adc' -> arith_binop_sizeof(Opnds);
+ 'add' -> arith_binop_sizeof(Opnds);
+ 'and' -> arith_binop_sizeof(Opnds);
+ 'bsf' -> bs_op_sizeof(Opnds);
+ 'bsr' -> bs_op_sizeof(Opnds);
+ 'bswap' -> bswap_sizeof(Opnds);
+ 'bt' -> bt_op_sizeof(Opnds);
+ 'btc' -> bt_op_sizeof(Opnds);
+ 'btr' -> bt_op_sizeof(Opnds);
+ 'bts' -> bt_op_sizeof(Opnds);
+ 'call' -> call_sizeof(Opnds);
+ 'cbw' -> cbw_sizeof(Opnds);
+ 'cdq' -> nullary_op_sizeof(Opnds);
+ 'clc' -> nullary_op_sizeof(Opnds);
+ 'cld' -> nullary_op_sizeof(Opnds);
+ 'cmc' -> nullary_op_sizeof(Opnds);
+ 'cmovcc' -> cmovcc_sizeof(Opnds);
+ 'cmp' -> arith_binop_sizeof(Opnds);
+ 'cwde' -> nullary_op_sizeof(Opnds);
+ 'dec' -> incdec_sizeof(Opnds);
+ 'div' -> arith_unop_sizeof(Opnds);
+ 'enter' -> enter_sizeof(Opnds);
+ 'fadd' -> fp_arith_sizeof(Opnds);
+ 'faddp' -> fp_arith_sizeof(Opnds);
+ 'fchs' -> fchs_sizeof();
+ 'fdiv' -> fp_arith_sizeof(Opnds);
+ 'fdivp' -> fp_arith_sizeof(Opnds);
+ 'fdivr' -> fp_arith_sizeof(Opnds);
+ 'fdivrp' -> fp_arith_sizeof(Opnds);
+ 'ffree' -> ffree_sizeof();
+ 'fild' -> fild_sizeof(Opnds);
+ 'fld' -> fld_sizeof(Opnds);
+ 'fmul' -> fp_arith_sizeof(Opnds);
+ 'fmulp' -> fp_arith_sizeof(Opnds);
+ 'fst' -> fst_sizeof(Opnds);
+ 'fstp' -> fst_sizeof(Opnds);
+ 'fsub' -> fp_arith_sizeof(Opnds);
+ 'fsubp' -> fp_arith_sizeof(Opnds);
+ 'fsubr' -> fp_arith_sizeof(Opnds);
+ 'fsubrp' -> fp_arith_sizeof(Opnds);
+ 'fwait' -> fwait_sizeof();
+ 'fxch' -> fxch_sizeof();
+ 'idiv' -> arith_unop_sizeof(Opnds);
+ 'imul' -> imul_sizeof(Opnds);
+ 'inc' -> incdec_sizeof(Opnds);
+ 'into' -> nullary_op_sizeof(Opnds);
+ 'jcc' -> jcc_sizeof(Opnds);
+ 'jecxz' -> jmp8_op_sizeof(Opnds);
+ 'jmp' -> jmp_sizeof(Opnds);
+ 'lea' -> lea_sizeof(Opnds);
+ 'leave' -> nullary_op_sizeof(Opnds);
+ 'loop' -> jmp8_op_sizeof(Opnds);
+ 'loope' -> jmp8_op_sizeof(Opnds);
+ 'loopne' -> jmp8_op_sizeof(Opnds);
+ 'mov' -> mov_sizeof(Opnds);
+ 'movsx' -> movx_op_sizeof(Opnds);
+ 'movzx' -> movx_op_sizeof(Opnds);
+ 'mul' -> arith_unop_sizeof(Opnds);
+ 'neg' -> arith_unop_sizeof(Opnds);
+ 'nop' -> nullary_op_sizeof(Opnds);
+ 'not' -> arith_unop_sizeof(Opnds);
+ 'or' -> arith_binop_sizeof(Opnds);
+ 'pop' -> pop_sizeof(Opnds);
+ 'prefix_fs' -> nullary_op_sizeof(Opnds);
+ 'push' -> push_sizeof(Opnds);
+ 'rcl' -> shift_op_sizeof(Opnds);
+ 'rcr' -> shift_op_sizeof(Opnds);
+ 'ret' -> ret_sizeof(Opnds);
+ 'rol' -> shift_op_sizeof(Opnds);
+ 'ror' -> shift_op_sizeof(Opnds);
+ 'sar' -> shift_op_sizeof(Opnds);
+ 'sbb' -> arith_binop_sizeof(Opnds);
+ 'setcc' -> setcc_sizeof(Opnds);
+ 'shl' -> shift_op_sizeof(Opnds);
+ 'shld' -> shd_op_sizeof(Opnds);
+ 'shr' -> shift_op_sizeof(Opnds);
+ 'shrd' -> shd_op_sizeof(Opnds);
+ 'stc' -> nullary_op_sizeof(Opnds);
+ 'std' -> nullary_op_sizeof(Opnds);
+ 'sub' -> arith_binop_sizeof(Opnds);
+ 'test' -> test_sizeof(Opnds);
+ 'xor' -> arith_binop_sizeof(Opnds);
+ _ -> exit({?MODULE,insn_sizeof,Op})
+ end.
+
+%%=====================================================================
+%% testing interface
+%%=====================================================================
+
+-ifdef(DO_HIPE_X86_ENCODE_TEST).
+
+say(OS, Str) ->
+ file:write(OS, Str).
+
+digit16(Dig0) ->
+ Dig = Dig0 band 16#F,
+ if Dig >= 16#A -> $A + (Dig - 16#A);
+ true -> $0 + Dig
+ end.
+
+say_byte(OS, Byte) ->
+ say(OS, "0x"),
+ say(OS, [digit16(Byte bsr 4)]),
+ say(OS, [digit16(Byte)]).
+
+init(OS) ->
+ say(OS, "\t.text\n").
+
+say_bytes(OS, Byte0, Bytes0) ->
+ say_byte(OS, Byte0),
+ case Bytes0 of
+ [] ->
+ say(OS, "\n");
+ [Byte1|Bytes1] ->
+ say(OS, ","),
+ say_bytes(OS, Byte1, Bytes1)
+ end.
+
+t(OS, Op, Opnds) ->
+ insn_sizeof(Op, Opnds),
+ {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0),
+ say(OS, "\t.byte "),
+ say_bytes(OS, Byte, Bytes).
+
+dotest1(OS) ->
+ init(OS),
+ % exercise all rm32 types
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32(16#87654321)}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}),
+ t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}),
+ t(OS,call,{{rm32,rm_reg(?EAX)}}),
+ t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}),
+ t(OS,call,{{rel32,-5}}),
+ % default parameters for the tests below
+ Word32 = 16#87654321,
+ Word16 = 16#F00F,
+ Word8 = 16#80,
+ Imm32 = {imm32,Word32},
+ Imm16 = {imm16,Word16},
+ Imm8 = {imm8,Word8},
+ RM32 = {rm32,rm_reg(?EDX)},
+ RM16 = {rm16,rm_reg(?EDX)},
+ RM8 = {rm8,rm_reg(?EDX)},
+ Rel32 = {rel32,Word32},
+ Rel8 = {rel8,Word8},
+ Moffs32 = {moffs32,Word32},
+ Moffs16 = {moffs16,Word32},
+ Moffs8 = {moffs8,Word32},
+ CC = {cc,?CC_G},
+ Reg32 = {reg32,?EAX},
+ Reg16 = {reg16,?EAX},
+ Reg8 = {reg8,?AH},
+ EA = {ea,ea_base(?ECX)},
+ % exercise each instruction definition
+ t(OS,'adc',{eax,Imm32}),
+ t(OS,'adc',{RM32,Imm32}),
+ t(OS,'adc',{RM32,Imm8}),
+ t(OS,'adc',{RM32,Reg32}),
+ t(OS,'adc',{Reg32,RM32}),
+ t(OS,'add',{eax,Imm32}),
+ t(OS,'add',{RM32,Imm32}),
+ t(OS,'add',{RM32,Imm8}),
+ t(OS,'add',{RM32,Reg32}),
+ t(OS,'add',{Reg32,RM32}),
+ t(OS,'and',{eax,Imm32}),
+ t(OS,'and',{RM32,Imm32}),
+ t(OS,'and',{RM32,Imm8}),
+ t(OS,'and',{RM32,Reg32}),
+ t(OS,'and',{Reg32,RM32}),
+ t(OS,'bsf',{Reg32,RM32}),
+ t(OS,'bsr',{Reg32,RM32}),
+ t(OS,'bswap',{Reg32}),
+ t(OS,'bt',{RM32,Reg32}),
+ t(OS,'bt',{RM32,Imm8}),
+ t(OS,'btc',{RM32,Reg32}),
+ t(OS,'btc',{RM32,Imm8}),
+ t(OS,'btr',{RM32,Reg32}),
+ t(OS,'btr',{RM32,Imm8}),
+ t(OS,'bts',{RM32,Reg32}),
+ t(OS,'bts',{RM32,Imm8}),
+ t(OS,'call',{Rel32}),
+ t(OS,'call',{RM32}),
+ t(OS,'cbw',{}),
+ t(OS,'cdq',{}),
+ t(OS,'clc',{}),
+ t(OS,'cld',{}),
+ t(OS,'cmc',{}),
+ t(OS,'cmovcc',{CC,Reg32,RM32}),
+ t(OS,'cmp',{eax,Imm32}),
+ t(OS,'cmp',{RM32,Imm32}),
+ t(OS,'cmp',{RM32,Imm8}),
+ t(OS,'cmp',{RM32,Reg32}),
+ t(OS,'cmp',{Reg32,RM32}),
+ t(OS,'cwde',{}),
+ t(OS,'dec',{RM32}),
+ t(OS,'dec',{Reg32}),
+ t(OS,'div',{RM32}),
+ t(OS,'enter',{Imm16,{imm8,3}}),
+ t(OS,'idiv',{RM32}),
+ t(OS,'imul',{RM32}),
+ t(OS,'imul',{Reg32,RM32}),
+ t(OS,'imul',{Reg32,RM32,Imm8}),
+ t(OS,'imul',{Reg32,RM32,Imm32}),
+ t(OS,'inc',{RM32}),
+ t(OS,'inc',{Reg32}),
+ t(OS,'into',{}),
+ t(OS,'jcc',{CC,Rel8}),
+ t(OS,'jcc',{CC,Rel32}),
+ t(OS,'jecxz',{Rel8}),
+ t(OS,'jmp',{Rel8}),
+ t(OS,'jmp',{Rel32}),
+ t(OS,'jmp',{RM32}),
+ t(OS,'lea',{Reg32,EA}),
+ t(OS,'leave',{}),
+ t(OS,'loop',{Rel8}),
+ t(OS,'loope',{Rel8}),
+ t(OS,'loopne',{Rel8}),
+ t(OS,'mov',{RM8,Reg8}),
+ t(OS,'mov',{RM16,Reg16}),
+ t(OS,'mov',{RM32,Reg32}),
+ t(OS,'mov',{Reg8,RM8}),
+ t(OS,'mov',{Reg16,RM16}),
+ t(OS,'mov',{Reg32,RM32}),
+ t(OS,'mov',{al,Moffs8}),
+ t(OS,'mov',{ax,Moffs16}),
+ t(OS,'mov',{eax,Moffs32}),
+ t(OS,'mov',{Moffs8,al}),
+ t(OS,'mov',{Moffs16,ax}),
+ t(OS,'mov',{Moffs32,eax}),
+ t(OS,'mov',{Reg8,Imm8}),
+ t(OS,'mov',{Reg16,Imm16}),
+ t(OS,'mov',{Reg32,Imm32}),
+ t(OS,'mov',{RM8,Imm8}),
+ t(OS,'mov',{RM16,Imm16}),
+ t(OS,'mov',{RM32,Imm32}),
+ t(OS,'movsx',{Reg16,RM8}),
+ t(OS,'movsx',{Reg32,RM8}),
+ t(OS,'movsx',{Reg32,RM16}),
+ t(OS,'movzx',{Reg16,RM8}),
+ t(OS,'movzx',{Reg32,RM8}),
+ t(OS,'movzx',{Reg32,RM16}),
+ t(OS,'mul',{RM32}),
+ t(OS,'neg',{RM32}),
+ t(OS,'nop',{}),
+ t(OS,'not',{RM32}),
+ t(OS,'or',{eax,Imm32}),
+ t(OS,'or',{RM32,Imm32}),
+ t(OS,'or',{RM32,Imm8}),
+ t(OS,'or',{RM32,Reg32}),
+ t(OS,'or',{Reg32,RM32}),
+ t(OS,'pop',{RM32}),
+ t(OS,'pop',{Reg32}),
+ t(OS,'push',{RM32}),
+ t(OS,'push',{Reg32}),
+ t(OS,'push',{Imm8}),
+ t(OS,'push',{Imm32}),
+ t(OS,'rcl',{RM32,1}),
+ t(OS,'rcl',{RM32,cl}),
+ t(OS,'rcl',{RM32,Imm8}),
+ t(OS,'rcl',{RM16,Imm8}),
+ t(OS,'rcr',{RM32,1}),
+ t(OS,'rcr',{RM32,cl}),
+ t(OS,'rcr',{RM32,Imm8}),
+ t(OS,'rcr',{RM16,Imm8}),
+ t(OS,'ret',{}),
+ t(OS,'ret',{Imm16}),
+ t(OS,'rol',{RM32,1}),
+ t(OS,'rol',{RM32,cl}),
+ t(OS,'rol',{RM32,Imm8}),
+ t(OS,'rol',{RM16,Imm8}),
+ t(OS,'ror',{RM32,1}),
+ t(OS,'ror',{RM32,cl}),
+ t(OS,'ror',{RM32,Imm8}),
+ t(OS,'ror',{RM16,Imm8}),
+ t(OS,'sar',{RM32,1}),
+ t(OS,'sar',{RM32,cl}),
+ t(OS,'sar',{RM32,Imm8}),
+ t(OS,'sar',{RM16,Imm8}),
+ t(OS,'sbb',{eax,Imm32}),
+ t(OS,'sbb',{RM32,Imm32}),
+ t(OS,'sbb',{RM32,Imm8}),
+ t(OS,'sbb',{RM32,Reg32}),
+ t(OS,'sbb',{Reg32,RM32}),
+ t(OS,'setcc',{CC,RM8}),
+ t(OS,'shl',{RM32,1}),
+ t(OS,'shl',{RM32,cl}),
+ t(OS,'shl',{RM32,Imm8}),
+ t(OS,'shl',{RM16,Imm8}),
+ t(OS,'shld',{RM32,Reg32,Imm8}),
+ t(OS,'shld',{RM32,Reg32,cl}),
+ t(OS,'shr',{RM32,1}),
+ t(OS,'shr',{RM32,cl}),
+ t(OS,'shr',{RM32,Imm8}),
+ t(OS,'shr',{RM16,Imm8}),
+ t(OS,'shrd',{RM32,Reg32,Imm8}),
+ t(OS,'shrd',{RM32,Reg32,cl}),
+ t(OS,'stc',{}),
+ t(OS,'std',{}),
+ t(OS,'sub',{eax,Imm32}),
+ t(OS,'sub',{RM32,Imm32}),
+ t(OS,'sub',{RM32,Imm8}),
+ t(OS,'sub',{RM32,Reg32}),
+ t(OS,'sub',{Reg32,RM32}),
+ t(OS,'test',{eax,Imm32}),
+ t(OS,'test',{RM32,Imm32}),
+ t(OS,'test',{RM32,Reg32}),
+ t(OS,'xor',{eax,Imm32}),
+ t(OS,'xor',{RM32,Imm32}),
+ t(OS,'xor',{RM32,Imm8}),
+ t(OS,'xor',{RM32,Reg32}),
+ t(OS,'xor',{Reg32,RM32}),
+ t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32(16#20))}}),
+ [].
+
+dotest() -> dotest1(group_leader()). % stdout == group_leader
+
+dotest(File) ->
+ {ok,OS} = file:open(File, [write]),
+ dotest1(OS),
+ file:close(OS).
+-endif.
diff --git a/lib/hipe/x86/hipe_x86_encode.txt b/lib/hipe/x86/hipe_x86_encode.txt
new file mode 100644
index 0000000000..13746e2a47
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_encode.txt
@@ -0,0 +1,213 @@
+$Id$
+
+hipe_x86_encode USAGE GUIDE
+Revision 0.4, 2001-10-09
+
+This document describes how to use the hipe_x86_encode.erl module.
+
+Preliminaries
+-------------
+This is not a tutorial on the x86 architecture. The reader
+should be familiar with both the programming model and
+the general syntax of instructions and their operands.
+
+The hipe_x86_encode module follows the conventions in the
+"Intel Architecture Software Developer's Manual, Volume 2:
+Instruction Set Reference" document. In particular, the
+order of source and destination operands in instructions
+follows Intel's conventions: "add eax,edx" adds edx to eax.
+The GNU Assembler "gas" follows the so-called AT&T syntax
+which reverses the order of the source and destination operands.
+
+Basic Functionality
+-------------------
+The hipe_x86_encode module implements the mapping from symbolic x86
+instructions to their binary representation, as lists of bytes.
+
+Instructions and operands have to match actual x86 instructions
+and operands exactly. The mapping from "abstract" instructions
+to correct x86 instructions has to be done before the instructions
+are passed to the hipe_x86_encode module. (In HiPE, this mapping
+is done by the hipe_x86_assemble module.)
+
+The hipe_x86_encode module handles arithmetic operations on 32-bit
+integers, data movement of 8, 16, and 32-bit words, and most
+control flow operations. A 32-bit address and operand size process
+mode is assumed, which is what Unix and Linux systems use.
+
+Operations and registers related to floating-point, MMX, SIMD, 3dnow!,
+or operating system control are not implemented. Segment registers
+are supported minimally: a 'prefix_fs' pseudo-instruction can be
+used to insert an FS segment register override prefix.
+
+Instruction Syntax
+------------------
+The function hipe_x86_encode:insn_encode/1 takes an instruction in
+symbolic form and translates it to its binary representation,
+as a list of bytes.
+
+Symbolic instructions are Erlang terms in the following syntax:
+
+ Insn ::= {Op,Opnds}
+ Op ::= (an Erlang atom)
+ Opnds ::= {Opnd1,...,Opndn} (n >= 0)
+ Opnd ::= eax | ax | al | 1 | cl
+ | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8}
+ | {rm32,RM32} | {rm16,RM16} | {rm8,RM8}
+ | {rel32,Rel32} | {rel8,Rel8}
+ | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8}
+ | {cc,CC}
+ | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8}
+ | {ea,EA}
+ Imm32 ::= (a 32-bit integer; immediate value)
+ Imm16 ::= (a 16-bit integer; immediate value)
+ Imm8 ::= (an 8-bit integer; immediate value)
+ Rel32 ::= (a 32-bit integer; jump offset)
+ Rel8 ::= (an 8-bit integer; jump offset)
+ Moffs32 ::= (a 32-bit integer; address of 32-bit word)
+ Moffs16 ::= (a 32-bit integer; address of 16-bit word)
+ Moffs8 ::= (a 32-bit integer; address of 8-bit word)
+ CC ::= (a 4-bit condition code)
+ Reg32 ::= (a 3-bit register number of a 32-bit register)
+ Reg16 ::= (same as Reg32, but the register size is 16 bits)
+ Reg8 ::= (a 3-bit register number of an 8-bit register)
+ EA ::= (general operand; a memory cell)
+ RM32 ::= (general operand; a 32-bit register or memory cell)
+ RM16 ::= (same as RM32, but the operand size is 16 bits)
+ RM8 ::= (general operand; an 8-bit register or memory cell)
+
+To construct these terms, the hipe_x86_encode module exports several
+helper functions:
+
+cc/1
+ Converts an atom to a 4-bit condition code.
+
+al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0
+ Returns a 3-bit register number for an 8-bit register.
+
+eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0
+ Returns a 3-bit register number for a 32- or 16-bit register.
+
+A general operand can be a register or a memory operand.
+An x86 memory operand is expressed as an "effective address":
+
+ Displacement(Base register,Index register,Scale)
+or
+ [base register] + [(index register) * (scale)] + [displacement]
+
+where the base register is any of the 8 integer registers,
+the index register in any of the 8 integer registers except ESP,
+scale is 0, 1, 2, or 3 (multiply index with 1, 2, 4, or 8),
+and displacement is an 8- or 32-bit offset.
+Most components are optional.
+
+An effective address is constructed by calling one of the following
+nine functions:
+
+ea_base/1
+ ea_base(Reg32), where Reg32 is not ESP or EBP,
+ constructs the EA "(Reg32)", i.e. Reg32.
+ea_disp32/1
+ ea_disp32(Disp32) construct the EA "Disp32"
+ea_disp32_base/2
+ ea_disp32(Disp32, Reg32), where Reg32 is not ESP,
+ constructs the EA "Disp32(Reg32)", i.e. Reg32+Disp32.
+ea_disp8_base/2
+ This is like ea_disp32_base/2, except the displacement
+ is 8 bits instead of 32 bits. The CPU will _sign-extend_
+ the 8-bit displacement to 32 bits before using it.
+ea_disp32_sindex/1
+ ea_disp32_sindex(Disp32) constructs the EA "Disp32",
+ but uses a longer encoding than ea_disp32/1.
+ Hint: Don't use this one.
+
+The last four forms use index registers with or without scaling
+factors and base registers, so-called "SIBs". To build these, call:
+
+sindex/2
+ sindex(Scale, Index), where scale is 0, 1, 2, or 3, and
+ Index is a 32-bit integer register except ESP, constructs
+ part of a SIB representing "Index * 2^Scale".
+sib/1
+ sib(Reg32) constructs a SIB containing only a base register
+ and no scaled index, "(Reg32)", i.e. "Reg32".
+sib/2
+ sib(Reg32, sindex(Scale, Index)) constructs a SIB
+ "(Reg32,Index,Scale)", i.e. "Reg32 + (Index * 2^Scale)".
+
+ea_sib/1
+ ea_sib(SIB), where SIB's base register is not EBP,
+ constructs an EA which is that SIB, i.e. "(Base)" or
+ "(Base,Index,Scale)".
+ea_disp32_sib/2
+ ea_disp32_sib(Disp32, SIB) constructs the EA "Disp32(SIB)",
+ i.e. "Base+Disp32" or "Base+(Index*2^Scale)+Disp32".
+ea_disp32_sindex/2
+ ea_disp32_sindex(Disp32, Sindex) constructs the EA
+ "Disp32(,Index,Scale)", i.e. "(Index*2^Scale)+Disp32".
+ea_disp8_sib/2
+ This is just like ea_disp32_sib/2, except the displacement
+ is 8 bits (with sign-extension).
+
+To construct a general operand, call one of these two functions:
+
+rm_reg/1
+ rm_reg(Reg) constructs a general operand which is that register.
+rm_mem/1
+ rm_mem(EA) constucts a general operand which is the memory
+ cell addressed by EA.
+
+A symbolic instruction with name "Op" and the n operands "Opnd1"
+to "Opndn" is represented as the tuple
+
+ {Op, {Opnd1, ..., Opndn}}
+
+Usage
+-----
+Once a symbolic instruction "Insn" has been constructed, it can be
+translated to binary by calling
+
+ insn_encode(Insn)
+
+which returns a list of bytes.
+
+Since x86 instructions have varying size (as opposed to most
+RISC machines), there is also a function
+
+ insn_sizeof(Insn)
+
+which returns the number of bytes the binary encoding will occupy.
+insn_sizeof(Insn) equals length(insn_encode(Insn)), but insn_sizeof
+is cheaper to compute. This is useful for two purposes: (1) when
+compiling to memory, one needs to know in advance how many bytes of
+memory to allocate for a piece of code, and (2) when computing the
+relative distance between a jump or call instruction and its target
+label.
+
+Examples
+--------
+1. nop
+is constructed as
+ {nop, {}}
+
+2. add eax,edx (eax := eax + edx)
+can be constructed as
+ {add, {eax, {reg32, hipe_x86_encode:edx()}}}
+or as
+ Reg32 = {reg32, hipe_x86_encode:eax()},
+ RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:edx())},
+ {add, {Reg32, RM32}}
+
+3. mov edx,(eax) (edx := MEM[eax])
+is constructed as
+ Reg32 = {reg32, hipe_x86_encode:edx()},
+ RM32 = {rm32, hipe_x86_encode:rm_reg(hipe_x86_encode:eax())},
+ {mov, {Reg32, RM32}}
+
+Addendum
+--------
+The hipe_x86_encode.erl source code is the authoritative reference
+for the hipe_x86_encode module.
+
+Please report errors in either hipe_x86_encode.erl or this guide
diff --git a/lib/hipe/x86/hipe_x86_frame.erl b/lib/hipe/x86/hipe_x86_frame.erl
new file mode 100644
index 0000000000..0a3317a369
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_frame.erl
@@ -0,0 +1,687 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% x86 stack frame handling
+%%%
+%%% - map non-register temps to stack slots
+%%% - add explicit stack management code to prologue and epilogue,
+%%% and at calls and tailcalls
+%%%
+%%% TODO:
+%%% - Compute max stack in a pre-pass? (get rid of ref cell updates)
+%%% - Merge all_temps and defun_minframe to a single
+%%% pass, for compile-time efficiency reasons.
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_FRAME, hipe_amd64_frame).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_LIVENESS, hipe_amd64_liveness).
+-define(LEAF_WORDS, ?AMD64_LEAF_WORDS).
+-else.
+-define(HIPE_X86_FRAME, hipe_x86_frame).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_LIVENESS, hipe_x86_liveness).
+-define(LEAF_WORDS, ?X86_LEAF_WORDS).
+-endif.
+
+-module(?HIPE_X86_FRAME).
+-export([frame/2]).
+-include("../x86/hipe_x86.hrl").
+-include("../rtl/hipe_literals.hrl").
+
+frame(Defun, _Options) ->
+ Formals = fix_formals(hipe_x86:defun_formals(Defun)),
+ Temps0 = all_temps(hipe_x86:defun_code(Defun), Formals),
+ MinFrame = defun_minframe(Defun),
+ Temps = ensure_minframe(MinFrame, Temps0),
+ CFG0 = hipe_x86_cfg:init(Defun),
+ Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),
+ CFG1 = do_body(CFG0, Liveness, Formals, Temps),
+ hipe_x86_cfg:linearise(CFG1).
+
+fix_formals(Formals) ->
+ fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals).
+
+fix_formals(0, Rest) -> Rest;
+fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest);
+fix_formals(_, []) -> [].
+
+do_body(CFG0, Liveness, Formals, Temps) ->
+ Context = mk_context(Liveness, Formals, Temps),
+ CFG1 = do_blocks(CFG0, Context),
+ do_prologue(CFG1, Context).
+
+do_blocks(CFG, Context) ->
+ Labels = hipe_x86_cfg:labels(CFG),
+ do_blocks(Labels, CFG, Context).
+
+do_blocks([Label|Labels], CFG, Context) ->
+ Liveness = context_liveness(Context),
+ LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label),
+ Block = hipe_x86_cfg:bb(CFG, Label),
+ Code = hipe_bb:code(Block),
+ NewCode = do_block(Code, LiveOut, Context),
+ NewBlock = hipe_bb:code_update(Block, NewCode),
+ NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock),
+ do_blocks(Labels, NewCFG, Context);
+do_blocks([], CFG, _) ->
+ CFG.
+
+do_block(Insns, LiveOut, Context) ->
+ do_block(Insns, LiveOut, Context, context_framesize(Context), []).
+
+do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->
+ {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0),
+ do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode));
+do_block([], _, Context, FPoff, RevCode) ->
+ FPoff0 = context_framesize(Context),
+ if FPoff =:= FPoff0 -> [];
+ true -> exit({?MODULE,do_block,FPoff})
+ end,
+ lists:reverse(RevCode, []).
+
+do_insn(I, LiveOut, Context, FPoff) ->
+ case I of
+ #alu{} ->
+ {[do_alu(I, Context, FPoff)], FPoff};
+ #cmp{} ->
+ {[do_cmp(I, Context, FPoff)], FPoff};
+ #fp_unop{} ->
+ {do_fp_unop(I, Context, FPoff), FPoff};
+ #fp_binop{} ->
+ {do_fp_binop(I, Context, FPoff), FPoff};
+ #fmove{} ->
+ {[do_fmove(I, Context, FPoff)], FPoff};
+ #imul{} ->
+ {[do_imul(I, Context, FPoff)], FPoff};
+ #move{} ->
+ {[do_move(I, Context, FPoff)], FPoff};
+ #movsx{} ->
+ {[do_movsx(I, Context, FPoff)], FPoff};
+ #movzx{} ->
+ {[do_movzx(I, Context, FPoff)], FPoff};
+ #pseudo_call{} ->
+ do_pseudo_call(I, LiveOut, Context, FPoff);
+ #pseudo_tailcall{} ->
+ {do_pseudo_tailcall(I, Context), context_framesize(Context)};
+ #push{} ->
+ {[do_push(I, Context, FPoff)], FPoff+word_size()};
+ #ret{} ->
+ {do_ret(I, Context, FPoff), context_framesize(Context)};
+ #shift{} ->
+ {[do_shift(I, Context, FPoff)], FPoff};
+ _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare
+ {[I], FPoff}
+ end.
+
+%%%
+%%% Convert any pseudo-temp operand in a binary (alu, cmp, move)
+%%% or unary (push) instruction to an explicit x86_mem operand.
+%%%
+
+do_alu(I, Context, FPoff) ->
+ #alu{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#alu{src=Src,dst=Dst}.
+
+do_cmp(I, Context, FPoff) ->
+ #cmp{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#cmp{src=Src,dst=Dst}.
+
+do_fp_unop(I, Context, FPoff) ->
+ #fp_unop{arg=Arg0} = I,
+ Arg = conv_opnd(Arg0, FPoff, Context),
+ [I#fp_unop{arg=Arg}].
+
+do_fp_binop(I, Context, FPoff) ->
+ #fp_binop{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ [I#fp_binop{src=Src,dst=Dst}].
+
+do_fmove(I, Context, FPoff) ->
+ #fmove{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#fmove{src=Src,dst=Dst}.
+
+do_imul(I, Context, FPoff) ->
+ #imul{src=Src0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ I#imul{src=Src}.
+
+do_move(I, Context, FPoff) ->
+ #move{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#move{src=Src,dst=Dst}.
+
+do_movsx(I, Context, FPoff) ->
+ #movsx{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#movsx{src=Src,dst=Dst}.
+
+do_movzx(I, Context, FPoff) ->
+ #movzx{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#movzx{src=Src,dst=Dst}.
+
+do_push(I, Context, FPoff) ->
+ #push{src=Src0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ I#push{src=Src}.
+
+do_shift(I, Context, FPoff) ->
+ #shift{src=Src0,dst=Dst0} = I,
+ Src = conv_opnd(Src0, FPoff, Context),
+ Dst = conv_opnd(Dst0, FPoff, Context),
+ I#shift{src=Src,dst=Dst}.
+
+conv_opnd(Opnd, FPoff, Context) ->
+ case opnd_is_pseudo(Opnd) of
+ false ->
+ Opnd;
+ true ->
+ conv_pseudo(Opnd, FPoff, Context)
+ end.
+
+conv_pseudo(Temp, FPoff, Context) ->
+ Off = FPoff + context_offset(Context, Temp),
+ conv_pseudo(Temp, Off).
+
+conv_pseudo(Temp, Off) ->
+ hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)).
+
+%%%
+%%% Return - deallocate frame and emit 'ret $N' insn.
+%%%
+
+do_ret(_I, Context, FPoff) ->
+ %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made
+ adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]).
+
+adjust_sp(N, Rest) ->
+ if N =:= 0 ->
+ Rest;
+ true ->
+ [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest]
+ end.
+
+%%%
+%%% Recursive calls.
+%%%
+
+do_pseudo_call(I, LiveOut, Context, FPoff0) ->
+ #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I),
+ Fun0 = hipe_x86:pseudo_call_fun(I),
+ Fun1 = conv_opnd(Fun0, FPoff0, Context),
+ LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)],
+ SDesc = mk_sdesc(ExnLab, Context, LiveTemps),
+ ContLab = hipe_x86:pseudo_call_contlab(I),
+ Linkage = hipe_x86:pseudo_call_linkage(I),
+ CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)],
+ %% +word_size() for our RA and +word_size() for callee's RA should
+ %% it need to call inc_stack
+ StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()),
+ context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)),
+ ArgsBytes = word_size() * StkArity,
+ {CallCode, FPoff0 - ArgsBytes}.
+
+stack_need(FPoff, StkArity, Fun) ->
+ case Fun of
+ #x86_prim{} -> FPoff;
+ #x86_mfa{m=M,f=F,a=A} ->
+ case erlang:is_builtin(M, F, A) of
+ true -> FPoff;
+ false -> stack_need_general(FPoff, StkArity)
+ end;
+ #x86_temp{} -> stack_need_general(FPoff, StkArity);
+ #x86_mem{} -> stack_need_general(FPoff, StkArity)
+ end.
+
+stack_need_general(FPoff, StkArity) ->
+ erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()).
+
+%%%
+%%% Create stack descriptors for call sites.
+%%%
+
+mk_sdesc(ExnLab, Context, Temps) -> % for normal calls
+ Temps0 = only_tagged(Temps),
+ Live = mk_live(Context, Temps0),
+ Arity = context_arity(Context),
+ FSize = context_framesize(Context),
+ hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity,
+ list_to_tuple(Live)).
+
+only_tagged(Temps)->
+ [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged'].
+
+mk_live(Context, Temps) ->
+ lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]).
+
+temp_to_slot(Context, Temp) ->
+ (context_framesize(Context) + context_offset(Context, Temp))
+ div word_size().
+
+mk_minimal_sdesc(Context) -> % for inc_stack_0 calls
+ hipe_x86:mk_sdesc([], 0, context_arity(Context), {}).
+
+%%%
+%%% Tailcalls.
+%%%
+
+do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context)
+ Arity = context_arity(Context),
+ Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)],
+ Fun0 = hipe_x86:pseudo_tailcall_fun(I),
+ {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0),
+ context_need_stack(Context, FPoff1),
+ FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args),
+ %% +word_size() for callee's inc_stack RA
+ StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)),
+ context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)),
+ I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)),
+ Insns ++ adjust_sp(FPoff2, [I2]).
+
+do_tailcall_args(Args, Context, Fun0) ->
+ FPoff0 = context_framesize(Context),
+ Arity = context_arity(Context),
+ FrameTop = word_size() + word_size()*Arity,
+ DangerOff = FrameTop - word_size()*length(Args),
+ Moves = mk_moves(Args, FrameTop, []),
+ {Stores, Simple, Conflict} =
+ split_moves(Moves, Context, DangerOff, [], [], []),
+ %% sanity check (shouldn't trigger any more)
+ if DangerOff < -FPoff0 ->
+ exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0});
+ true -> []
+ end,
+ FPoff1 = FPoff0,
+ %%
+ {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []),
+ %%
+ {PushFun0, FPoff3, LoadFun1, Fun1} =
+ case opnd_is_pseudo(Fun0) of
+ false ->
+ {[], FPoff2, [], Fun0};
+ true ->
+ Type = hipe_x86:temp_type(Fun0),
+ Temp1 = mk_temp1(Type),
+ Fun0Off = context_offset(Context, Fun0),
+ MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off),
+ if Fun0Off >= DangerOff ->
+ Fun1Off = hipe_x86:mk_imm(0),
+ MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type),
+ {[hipe_x86:mk_push(MEM0)],
+ FPoff2 + word_size(),
+ [hipe_x86:mk_move(MEM1, Temp1)],
+ Temp1};
+ true ->
+ {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1}
+ end
+ end,
+ %%
+ RegTemp0 = ?HIPE_X86_REGISTERS:temp0(),
+ TempReg =
+ case hipe_x86:is_temp(Fun1) of
+ true ->
+ RegFun1 = hipe_x86:temp_reg(Fun1),
+ if RegFun1 =/= RegTemp0 -> RegTemp0;
+ true -> ?HIPE_X86_REGISTERS:temp1()
+ end;
+ false ->
+ RegTemp0
+ end,
+ %%
+ {Pushes ++ PushFun0 ++
+ store_moves(Stores, FPoff3, LoadFun1 ++
+ simple_moves(Simple, FPoff3, TempReg,
+ simple_moves(MoreSimple, FPoff3, TempReg,
+ []))),
+ FPoff3, Fun1}.
+
+mk_moves([Arg|Args], Off, Moves) ->
+ Off1 = Off - word_size(),
+ mk_moves(Args, Off1, [{Arg,Off1}|Moves]);
+mk_moves([], _, Moves) ->
+ Moves.
+
+split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) ->
+ {Src,DstOff} = Move,
+ case src_is_pseudo(Src) of
+ false ->
+ split_moves(Moves, Context, DangerOff, [Move|Stores],
+ Simple, Conflict);
+ true ->
+ SrcOff = context_offset(Context, Src),
+ Type = typeof_src(Src),
+ if SrcOff =:= DstOff ->
+ split_moves(Moves, Context, DangerOff, Stores,
+ Simple, Conflict);
+ SrcOff >= DangerOff ->
+ split_moves(Moves, Context, DangerOff, Stores,
+ Simple, [{SrcOff,DstOff,Type}|Conflict]);
+ true ->
+ split_moves(Moves, Context, DangerOff, Stores,
+ [{SrcOff,DstOff,Type}|Simple], Conflict)
+ end
+ end;
+split_moves([], _, _, Stores, Simple, Conflict) ->
+ {Stores, Simple, Conflict}.
+
+split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) ->
+ Push = hipe_x86:mk_push(
+ hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)),
+ split_conflict(Conflict, FPoff+word_size(), [Push|Pushes],
+ [{-(FPoff+word_size()),DstOff,Type}|Simple]);
+split_conflict([], FPoff, Pushes, Simple) ->
+ {lists:reverse(Pushes), Simple, FPoff}.
+
+simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) ->
+ Temp = hipe_x86:mk_temp(TempReg, Type),
+ SP = mk_sp(),
+ LoadOff = hipe_x86:mk_imm(FPoff+SrcOff),
+ LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp),
+ StoreOff = hipe_x86:mk_imm(FPoff+DstOff),
+ ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)),
+ simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]);
+simple_moves([], _, _, Rest) ->
+ Rest.
+
+store_moves([{Src,DstOff}|Moves], FPoff, Rest) ->
+ Type = typeof_src(Src),
+ SP = mk_sp(),
+ StoreOff = hipe_x86:mk_imm(FPoff+DstOff),
+ ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)),
+ store_moves(Moves, FPoff, [ST | Rest]);
+store_moves([], _, Rest) ->
+ Rest.
+
+%%%
+%%% Contexts
+%%%
+
+-record(context, {liveness, framesize, arity, map, ra, ref_maxstack}).
+
+mk_context(Liveness, Formals, Temps) ->
+ RA = hipe_x86:mk_new_temp('untagged'),
+ {Map, MinOff} = mk_temp_map(Formals, RA, Temps),
+ FrameSize = (-MinOff),
+ RefMaxStack = hipe_bifs:ref(FrameSize),
+ Context = #context{liveness=Liveness,
+ framesize=FrameSize, arity=length(Formals),
+ map=Map, ra=RA, ref_maxstack=RefMaxStack},
+ Context.
+
+context_need_stack(#context{ref_maxstack=RM}, N) ->
+ M = hipe_bifs:ref_get(RM),
+ if N > M -> hipe_bifs:ref_set(RM, N);
+ true -> []
+ end.
+
+context_maxstack(#context{ref_maxstack=RM}) ->
+ hipe_bifs:ref_get(RM).
+
+context_arity(#context{arity=Arity}) ->
+ Arity.
+
+context_framesize(#context{framesize=FrameSize}) ->
+ FrameSize.
+
+context_liveness(#context{liveness=Liveness}) ->
+ Liveness.
+
+context_offset(#context{map=Map}, Temp) ->
+ tmap_lookup(Map, Temp).
+
+context_ra(#context{ra=RA}) ->
+ RA.
+
+mk_temp_map(Formals, RA, Temps) ->
+ {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1),
+ tmap_bind(tmap_empty(), RA, 0)),
+ enter_vars(tset_to_list(Temps), 0, Map).
+
+enter_vars([V|Vs], PrevOff, Map) ->
+ Off =
+ case hipe_x86:temp_type(V) of
+ 'double' -> PrevOff - float_size();
+ _ -> PrevOff - word_size()
+ end,
+ enter_vars(Vs, Off, tmap_bind(Map, V, Off));
+enter_vars([], Off, Map) ->
+ {Map, Off}.
+
+tmap_empty() ->
+ gb_trees:empty().
+
+tmap_bind(Map, Key, Val) ->
+ gb_trees:insert(Key, Val, Map).
+
+tmap_lookup(Map, Key) ->
+ gb_trees:get(Key, Map).
+
+%%%
+%%% do_prologue: prepend stack frame allocation code.
+%%%
+%%% NewStart:
+%%% temp0 = sp - MaxStack
+%%% if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame
+%%% AllocFrame:
+%%% sp -= FrameSize
+%%% goto OldStart
+%%% OldStart:
+%%% ...
+%%% IncStack:
+%%% call inc_stack
+%%% goto NewStart
+
+do_prologue(CFG, Context) ->
+ do_check_stack(do_alloc_frame(CFG, Context), Context).
+
+do_alloc_frame(CFG, Context) ->
+ case context_framesize(Context) of
+ 0 ->
+ CFG;
+ FrameSize ->
+ OldStartLab = hipe_x86_cfg:start_label(CFG),
+ AllocFrameLab = hipe_gensym:get_next_label(x86),
+ SP = mk_sp(),
+ AllocFrameCode =
+ [hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP),
+ hipe_x86:mk_jmp_label(OldStartLab)],
+ CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab,
+ hipe_bb:mk_bb(AllocFrameCode)),
+ hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab)
+ end.
+
+do_check_stack(CFG, Context) ->
+ MaxStack = context_maxstack(Context),
+ Arity = context_arity(Context),
+ Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()),
+ if MaxStack =< Guaranteed ->
+ %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]),
+ CFG;
+ true ->
+ %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]),
+ AllocFrameLab = hipe_x86_cfg:start_label(CFG),
+ NewStartLab = hipe_gensym:get_next_label(x86),
+ IncStackLab = hipe_gensym:get_next_label(x86),
+ %%
+ Type = 'untagged',
+ Preg = ?HIPE_X86_REGISTERS:proc_pointer(),
+ Pbase = hipe_x86:mk_temp(Preg, Type),
+ SP_LIMIT_OFF = hipe_x86:mk_imm(
+ ?HIPE_X86_REGISTERS:sp_limit_offset()),
+ Temp0 = mk_temp0(Type),
+ SP = mk_sp(),
+ NewStartCode =
+ %% hopefully this lea is faster than the mov;sub it replaced
+ [hipe_x86:mk_lea(
+ hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'),
+ Temp0),
+ hipe_x86:mk_cmp(
+ hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0),
+ hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)],
+ IncStackCode =
+ [hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'),
+ mk_minimal_sdesc(Context), not_remote),
+ hipe_x86:mk_jmp_label(NewStartLab)],
+ %%
+ CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab,
+ hipe_bb:mk_bb(NewStartCode)),
+ CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab,
+ hipe_bb:mk_bb(IncStackCode)),
+ hipe_x86_cfg:start_label_update(CFG2, NewStartLab)
+ end.
+
+%%% typeof_src -- what's src's type?
+
+typeof_src(Src) ->
+ case Src of
+ #x86_imm{} ->
+ 'untagged';
+ #x86_temp{} ->
+ hipe_x86:temp_type(Src);
+ #x86_mem{} ->
+ hipe_x86:mem_type(Src)
+ end.
+
+%%% Cons up an '%sp' Temp.
+
+mk_sp() ->
+ hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged').
+
+%%% Cons up a '%temp0' Temp.
+
+mk_temp0(Type) ->
+ hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type).
+
+%%% Cons up a '%temp1' Temp.
+
+mk_temp1(Type) ->
+ hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type).
+
+%%% Check if an operand is a pseudo-Temp.
+
+src_is_pseudo(Src) ->
+ opnd_is_pseudo(Src).
+
+opnd_is_pseudo(Opnd) ->
+ case hipe_x86:is_temp(Opnd) of
+ true -> temp_is_pseudo(Opnd);
+ false -> false
+ end.
+
+temp_is_pseudo(Temp) ->
+ case hipe_x86:is_temp(Temp) of
+ true ->
+ not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp)));
+ false ->
+ false
+ end.
+
+
+%%%
+%%% Build the set of all temps used in a Defun's body.
+%%%
+
+all_temps(Code, Formals) ->
+ S0 = find_temps(Code, tset_empty()),
+ S1 = tset_del_list(S0, Formals),
+ S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end),
+ S2.
+
+find_temps([I|Insns], S0) ->
+ S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)),
+ S2 = tset_add_list(S1, hipe_x86_defuse:insn_use(I)),
+ find_temps(Insns, S2);
+find_temps([], S) ->
+ S.
+
+tset_empty() ->
+ gb_sets:new().
+
+tset_size(S) ->
+ gb_sets:size(S).
+
+tset_insert(S, T) ->
+ gb_sets:add_element(T, S).
+
+tset_add_list(S, Ts) ->
+ gb_sets:union(S, gb_sets:from_list(Ts)).
+
+tset_del_list(S, Ts) ->
+ gb_sets:subtract(S, gb_sets:from_list(Ts)).
+
+tset_filter(S, F) ->
+ gb_sets:filter(F, S).
+
+tset_to_list(S) ->
+ gb_sets:to_list(S).
+
+%%%
+%%% Compute minimum permissible frame size, ignoring spilled temps.
+%%% This is done to ensure that we won't have to adjust the frame size
+%%% in the middle of a tailcall.
+%%%
+
+defun_minframe(Defun) ->
+ MaxTailArity = body_mta(hipe_x86:defun_code(Defun), 0),
+ MyArity = length(fix_formals(hipe_x86:defun_formals(Defun))),
+ erlang:max(MaxTailArity - MyArity, 0).
+
+body_mta([I|Code], MTA) ->
+ body_mta(Code, insn_mta(I, MTA));
+body_mta([], MTA) ->
+ MTA.
+
+insn_mta(I, MTA) ->
+ case I of
+ #pseudo_tailcall{arity=Arity} ->
+ erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args());
+ _ -> MTA
+ end.
+
+%%%
+%%% Ensure that we have enough temps to satisfy the minimum frame size,
+%%% if necessary by prepending unused dummy temps.
+%%%
+
+ensure_minframe(MinFrame, Temps) ->
+ ensure_minframe(MinFrame, tset_size(Temps), Temps).
+
+ensure_minframe(MinFrame, Frame, Temps) ->
+ if MinFrame > Frame ->
+ Temp = hipe_x86:mk_new_temp('untagged'),
+ ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp));
+ true -> Temps
+ end.
+
+word_size() ->
+ ?HIPE_X86_REGISTERS:wordsize().
+
+float_size() ->
+ ?HIPE_X86_REGISTERS:float_size().
diff --git a/lib/hipe/x86/hipe_x86_liveness.erl b/lib/hipe/x86/hipe_x86_liveness.erl
new file mode 100644
index 0000000000..6874b05a59
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_liveness.erl
@@ -0,0 +1,57 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% x86_liveness -- compute register liveness for x86 CFGs
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_LIVENESS, hipe_amd64_liveness).
+-define(HIPE_X86_DEFUSE, hipe_amd64_defuse).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-else.
+-define(HIPE_X86_LIVENESS, hipe_x86_liveness).
+-define(HIPE_X86_DEFUSE, hipe_x86_defuse).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-endif.
+
+-module(?HIPE_X86_LIVENESS).
+
+-export([analyse/1]).
+-export([liveout/2]).
+-export([uses/1, defines/1]). % used in hipe_*_spill_restore modules
+
+-include("../x86/hipe_x86.hrl"). % ../x86/ is needed when included in amd64
+-include("../flow/liveness.inc").
+
+analyse(CFG) -> analyze(CFG).
+cfg_bb(CFG, L) -> hipe_x86_cfg:bb(CFG, L).
+cfg_postorder(CFG) -> hipe_x86_cfg:postorder(CFG).
+cfg_succ(CFG, L) -> hipe_x86_cfg:succ(CFG, L).
+uses(Insn) -> ?HIPE_X86_DEFUSE:insn_use(Insn).
+defines(Insn) -> ?HIPE_X86_DEFUSE:insn_def(Insn).
+liveout_no_succ() ->
+ ordsets:from_list(lists:map(fun({Reg,Type}) ->
+ hipe_x86:mk_temp(Reg, Type)
+ end,
+ ?HIPE_X86_REGISTERS:live_at_return())).
+
+-ifdef(DEBUG_LIVENESS).
+cfg_labels(CFG) -> hipe_x86_cfg:labels(CFG).
+cfg_bb_add(CFG,L,NewBB) -> hipe_x86_cfg:bb_add(CFG,L,NewBB).
+mk_comment(Text) -> hipe_x86:mk_comment(Text).
+-endif.
diff --git a/lib/hipe/x86/hipe_x86_main.erl b/lib/hipe/x86/hipe_x86_main.erl
new file mode 100644
index 0000000000..f45a49ca0a
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_main.erl
@@ -0,0 +1,70 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_MAIN, hipe_amd64_main).
+-define(RTL_TO_X86, rtl_to_amd64). % XXX: kill this crap
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_amd64).
+-define(HIPE_X86_RA, hipe_amd64_ra).
+-define(HIPE_X86_FRAME, hipe_amd64_frame).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-define(X86TAG, amd64). % XXX: kill this crap
+-define(X86STR, "amd64").
+-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore).
+-else.
+-define(HIPE_X86_MAIN, hipe_x86_main).
+-define(RTL_TO_X86, rtl_to_x86). % XXX: kill this crap
+-define(HIPE_RTL_TO_X86, hipe_rtl_to_x86).
+-define(HIPE_X86_RA, hipe_x86_ra).
+-define(HIPE_X86_FRAME, hipe_x86_frame).
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(X86TAG, x86). % XXX: kill this crap
+-define(X86STR, "x86").
+-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore).
+-endif.
+
+-module(?HIPE_X86_MAIN).
+-export([?RTL_TO_X86/3]). % XXX: change to 'from_rtl' to avoid $ARCH substring
+
+-ifndef(DEBUG).
+-define(DEBUG,1).
+-endif.
+-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.
+-include("../main/hipe.hrl").
+
+?RTL_TO_X86(MFA, RTL, Options) ->
+ Translated = ?option_time(?HIPE_RTL_TO_X86:translate(RTL),
+ "RTL-to-"?X86STR, Options),
+ SpillRest =
+ case proplists:get_bool(caller_save_spill_restore, Options) of
+ true ->
+ ?option_time(?HIPE_X86_SPILL_RESTORE:spill_restore(Translated, Options),
+ ?X86STR" spill restore", Options);
+ false ->
+ Translated
+ end,
+ Allocated = ?option_time(?HIPE_X86_RA:ra(SpillRest, Options),
+ ?X86STR" register allocation", Options),
+ Framed = ?option_time(?HIPE_X86_FRAME:frame(Allocated, Options),
+ ?X86STR" frame", Options),
+ Finalised = ?option_time(hipe_x86_postpass:postpass(Framed, Options),
+ ?X86STR" finalise", Options),
+ ?HIPE_X86_PP:optional_pp(Finalised, MFA, Options),
+ {native, ?X86TAG, {unprofiled, Finalised}}.
diff --git a/lib/hipe/x86/hipe_x86_postpass.erl b/lib/hipe/x86/hipe_x86_postpass.erl
new file mode 100644
index 0000000000..34e3d7a11b
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_postpass.erl
@@ -0,0 +1,276 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2003-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%%----------------------------------------------------------------------
+%%% File : hipe_x86_postpass.erl
+%%% Author : Christoffer Vikstr�m <[email protected]>
+%%% Purpose : Contain postpass optimisations for x86-assembler code.
+%%% Created : 5 Aug 2003 by Christoffer Vikstr�m <[email protected]>
+%%%----------------------------------------------------------------------
+
+-ifndef(HIPE_X86_POSTPASS).
+-define(HIPE_X86_POSTPASS, hipe_x86_postpass).
+-endif.
+
+-module(?HIPE_X86_POSTPASS).
+-export([postpass/2]).
+-include("../x86/hipe_x86.hrl").
+
+%%>----------------------------------------------------------------------<
+% Procedure : postpass/2
+% Purpose : Function that performs a nr of postpass optimizations on
+% the hipe x86-assembler code before it is encoded and loaded.
+%%>----------------------------------------------------------------------<
+postpass(#defun{code=Code0}=Defun, Options) ->
+ Code1 = pseudo_insn_expansion(Code0),
+ Code2 = case proplists:get_bool(peephole, Options) of
+ true -> peephole_optimization(Code1);
+ false -> Code1
+ end,
+ Code3 = trivial_goto_elimination(Code2),
+ Defun#defun{code=Code3}.
+
+
+%%>----------------------------------------------------------------------<
+% Procedure : peep/1
+% Purpose : Function that does peephole optimizations. It works by
+% moving a window over the code and looking at a sequence of
+% a few instructions. Replaces long sequences of instructions
+% with shorter ones and removes unnecesary ones.
+% Arguments : Insns - List of pseudo x86-assembler records.
+% Res - Returned list of pseudo x86-assembler records.
+% Kept reversed, until it is returned.
+% Return : An optimized list of pseudo x86-assembler records with
+% (hopefully) fewer or faster instructions.
+%%>----------------------------------------------------------------------<
+peephole_optimization(Insns) ->
+ peep(Insns, [], []).
+
+%% MoveSelf related peep-opts
+%% ------------------------------
+peep([#fmove{src=Src, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, Res, [moveSelf1|Lst]);
+peep([I=#fmove{src=Src, dst=Dst},
+ #fmove{src=Dst, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, [I|Res], [moveSelf2|Lst]);
+peep([#movsx{src=Src, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, Res, [moveSelf3|Lst]);
+peep([I=#movsx{src=Src, dst=Dst},
+ #movsx{src=Dst, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, [I|Res], [moveSelf4|Lst]);
+peep([#movzx{src=Src, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, Res, [moveSelf5|Lst]);
+peep([I=#movzx{src=Src, dst=Dst},
+ #movzx{src=Dst, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, [I|Res], [moveSelf6|Lst]);
+peep([#cmovcc{src=Src, dst=Src} | Insns], Res,Lst) ->
+ peep(Insns, Res, [moveSelf7|Lst]);
+peep([I=#cmovcc{src=Src, dst=Dst},
+ #cmovcc{src=Dst, dst=Src}|Insns], Res,Lst) ->
+ peep(Insns, [I|Res], [moveSelf8|Lst]);
+peep([#move{src=#x86_temp{reg=X},
+ dst=#x86_temp{reg=X}} | Insns], Res,Lst) ->
+ peep(Insns, Res, [moveSelf9|Lst]);
+peep([I=#move{src=#x86_temp{reg=Src}, dst=#x86_temp{reg=Dst}},
+ #move{src=#x86_temp{reg=Dst}, dst=#x86_temp{reg=Src}} | Insns], Res,Lst) ->
+ peep(Insns, [I|Res], [moveSelf0|Lst]);
+
+
+%% ElimBinALMDouble
+%% ----------------
+peep([Move=#move{src=Src, dst=Dst}, Alu=#alu{src=Src, dst=Dst}|Insns], Res, Lst) ->
+ peep([Alu#alu{src=Dst}|Insns], [Move|Res], [elimBinALMDouble|Lst]);
+
+
+%% ElimFBinDouble
+%% --------------
+peep([Move=#fmove{src=Src, dst=Dst},
+ BinOp=#fp_binop{src=Src, dst=Dst}|Insns], Res, Lst) ->
+ peep([BinOp#fp_binop{src=Dst}|Insns], [Move|Res], [elimFBinDouble|Lst]);
+
+
+%% CommuteBinALMD
+%% --------------
+peep([#move{src=Src1, dst=Dst},
+ #alu{aluop=Op,src=Src2,dst=Dst}|Insns], Res, Lst)
+ when (Src1 =:= #x86_imm{}) and (Src2 =/= #x86_imm{}) and
+ ((Op =:= 'add') or (Op =:= 'and') or (Op =:= 'or') or (Op =:= 'xor')) ->
+ peep(Insns, [#alu{aluop=Op,src=Src1,dst=Dst},
+ #move{src=Src2, dst=Dst}|Res],
+ [commuteBinALMD|Lst]);
+
+
+%% ElimCmp0
+%% --------
+peep([C=#cmp{src=Src, dst=Dst},J=#jcc{cc=Cond, label=Lab}|Insns],Res,Lst) ->
+ case (((Src =:= #x86_imm{value=0}) or (Dst =:= #x86_imm{value=0})) and
+ ((Cond =:= 'eq') or (Cond =:= 'neq'))) of
+ true ->
+ Src2 = case Src of #x86_imm{value=0} -> Src; _ -> Dst end,
+ Cond2 = case Cond of 'eq' -> 'z'; 'neq' -> 'nz' end,
+ Test = #test{src=Src2, dst=#x86_imm{value=0}},
+ Jump = #jcc{cc=Cond2, label=Lab},
+ peep(Insns, [Jump, Test|Res], [elimCmp0|Lst]);
+ _ ->
+ peep(Insns, [J,C|Res], Lst)
+ end;
+
+
+%% ElimCmpTest
+%% -----------
+peep([I|Insns],Res,Lst) when (I =:= #cmp{}) or (I =:= #test{}) ->
+ case check(Insns) of
+ #jcc{} ->
+ peep(Insns, [I|Res], Lst);
+ #jmp_fun{} ->
+ peep(Insns, [I|Res], Lst);
+ #jmp_label{} ->
+ peep(Insns, [I|Res], Lst);
+ #jmp_switch{} ->
+ peep(Insns, [I|Res], Lst);
+ #cmovcc{} ->
+ peep(Insns, [I|Res], Lst);
+ #ret{} ->
+ peep(Insns, [I|Res], Lst);
+ _ ->
+ peep(Insns, Res, [elimCmpTest|Lst])
+ end;
+
+
+%% ElimPushPop
+%% -----------
+peep([#push{src=Opr}, #pop{dst=Opr} | Insns], Res, Lst) ->
+ peep(Insns, Res, [elimPushPop|Lst]);
+
+
+% %% ElimIFF
+% %% -------
+peep([#jcc{label=Lab}, I=#label{label=Lab}|Insns], Res, Lst) ->
+ peep(Insns, [I, #jmp_label{label=Lab}|Res], [elimIFF|Lst]);
+
+
+%% ElimSet0
+%% --------
+peep([#move{src=#x86_imm{value=0},dst=Dst}|Insns],Res,Lst)
+when (Dst==#x86_temp{}) ->
+ peep(Insns, [#alu{aluop='xor', src=Dst, dst=Dst}|Res], [elimSet0|Lst]);
+
+%% ElimMDPow2
+%% ----------
+peep([B = #alu{aluop=Op,src=#x86_imm{value=Val},dst=Dst}|Insns], Res, Lst) ->
+ {IsLog2, Size, Sign} = log2(Val),
+ case ((Op =:= imul) or (Op =:= idiv)) and IsLog2 of
+ true ->
+ Sh = case Sign of positive -> 'bsl'; negative -> 'bsr' end,
+ peep(Insns,
+ [#shift{shiftop=Sh, src=#x86_imm{value=Size}, dst=Dst}|Res],
+ [elimMDPow2|Lst]);
+ false ->
+ peep(Insns, [B|Res], Lst)
+ end;
+
+%% SubToDec
+%% This rule turns "subl $1,Dst; jl Lab" into "decl Dst; jl Lab", which
+%% changes reduction counter tests to use decl instead of subl.
+%% However, on Athlon64 this leads to a small but measurable decrease
+%% in performance. The use of dec is also not recommended on P4, so
+%% this transformation is disabled.
+%% peep([#alu{aluop='sub',src=#x86_imm{value=1},dst=Dst},J=#jcc{cc='l'}|Insns], Res, Lst) ->
+%% peep(Insns, [J, #dec{dst=Dst} | Res], [subToDec|Lst]);
+
+%% Standard list recursion clause
+%% ------------------------------
+peep([I | Insns], Res, Lst) ->
+ peep(Insns, [I|Res], Lst);
+peep([], Res, _Lst) ->
+ lists:reverse(Res).
+
+%% Simple goto elimination
+%% -----------------------
+trivial_goto_elimination(Insns) -> goto_elim(Insns, []).
+
+goto_elim([#jmp_label{label=Label}, I = #label{label=Label}|Insns], Res) ->
+ goto_elim([I|Insns], Res);
+goto_elim([I | Insns], Res) ->
+ goto_elim(Insns, [I|Res]);
+goto_elim([], Res) ->
+ lists:reverse(Res).
+
+
+%%>----------------------------------------------------------------------<
+%% Procedure : expand/1
+%% Purpose : Expands pseudo instructions.
+%% Arguments : Insns - An x86-instruction list.
+%% Return : An expanded instruction list.
+%% Notes :
+%%>----------------------------------------------------------------------<
+pseudo_insn_expansion(Insns) -> expand(Insns, []).
+expand([I|Tail], Res) ->
+ case I of
+ #pseudo_jcc{cc=Cc,true_label=TrueLab,false_label=FalseLab} ->
+ expand(Tail, [hipe_x86:mk_jmp_label(FalseLab),
+ hipe_x86:mk_jcc(Cc, TrueLab) | Res]);
+ #pseudo_tailcall_prepare{} ->
+ expand(Tail, Res);
+ #pseudo_call{'fun'=Fun,sdesc=SDesc,contlab=ContLab,linkage=Linkage} ->
+ expand(Tail, [hipe_x86:mk_jmp_label(ContLab),
+ hipe_x86:mk_call(Fun, SDesc, Linkage) | Res]);
+ _ ->
+ expand(Tail, [I|Res])
+ end;
+expand([], Res) -> lists:reverse(Res).
+
+%% Log2 function
+%% -------------
+%% Used by ElimMDPow2 clause of peep(..)
+log2(Nr) -> log2(Nr, 0).
+log2(0, _) -> {false, 0, positive};
+log2(Nr, I) ->
+ case (Nr band 1) =:= 1 of
+ true ->
+ case Nr of
+ 1 ->
+ {true, I, positive};
+ -1 ->
+ {true, I, negative};
+ _ ->
+ {false, 0, positive}
+ end;
+ false ->
+ log2((Nr bsr 1), I+1)
+ end.
+
+%% Skips through all comments and move instructions and returns the next one
+%% -------------------------------------------------------------------------
+%% Used by ElimCmpTest above.
+check([I|Ins]) ->
+ case I of
+ #comment{} ->
+ check(Ins);
+ #move{} ->
+ check(Ins);
+ #fmove{} ->
+ check(Ins);
+ #movsx{} ->
+ check(Ins);
+ #movzx{} ->
+ check(Ins);
+ OtherI ->
+ OtherI
+ end.
diff --git a/lib/hipe/x86/hipe_x86_pp.erl b/lib/hipe/x86/hipe_x86_pp.erl
new file mode 100644
index 0000000000..555e21a446
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_pp.erl
@@ -0,0 +1,350 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% x86 pretty-printer
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-else.
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-endif.
+
+-module(?HIPE_X86_PP).
+-export([% pp/1, pp/2,
+ pp_insn/1, optional_pp/3]).
+-include("../x86/hipe_x86.hrl").
+
+optional_pp(Defun, MFA, Options) ->
+ case proplists:get_value(pp_native, Options) of
+ true ->
+ pp(Defun);
+ {only,Lst} when is_list(Lst) ->
+ case lists:member(MFA, Lst) of
+ true -> pp(Defun);
+ false -> ok
+ end;
+ {only,MFA} ->
+ pp(Defun);
+ {file,FileName} ->
+ {ok, File} = file:open(FileName, [write,append]),
+ pp(File, Defun),
+ ok = file:close(File);
+ _ ->
+ ok
+ end.
+
+pp(Defun) ->
+ pp(standard_io, Defun).
+
+pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) ->
+ Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A),
+ io:format(Dev, "\t.text\n", []),
+ io:format(Dev, "\t.align 4\n", []),
+ io:format(Dev, "\t.global ~s\n", [Fname]),
+ io:format(Dev, "~s:\n", [Fname]),
+ pp_insns(Dev, Code, Fname),
+ io:format(Dev, "\t.rodata\n", []),
+ io:format(Dev, "\t.align 4\n", []),
+ hipe_data_pp:pp(Dev, Data, x86, Fname),
+ io:format(Dev, "\n", []).
+
+pp_insns(Dev, [I|Is], Fname) ->
+ pp_insn(Dev, I, Fname),
+ pp_insns(Dev, Is, Fname);
+pp_insns(_, [], _) ->
+ ok.
+
+pp_insn(I) ->
+ pp_insn(standard_io, I, "").
+
+pp_insn(Dev, I, Pre) ->
+ case I of
+ #alu{aluop=AluOp, src=Src, dst=Dst} ->
+ io:format(Dev, "\t~s ", [alu_op_name(AluOp)]),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #call{'fun'=Fun, sdesc=SDesc, linkage=Linkage} ->
+ io:format(Dev, "\tcall ", []),
+ pp_fun(Dev, Fun),
+ io:format(Dev, " #", []),
+ pp_sdesc(Dev, Pre, SDesc),
+ io:format(Dev, " ~w\n", [Linkage]);
+ #cmovcc{cc=Cc, src=Src, dst=Dst} ->
+ io:format(Dev, "\tcmov~s ", [cc_name(Cc)]),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #cmp{src=Src, dst=Dst} ->
+ io:format(Dev, "\tcmp ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #comment{term=Term} ->
+ io:format(Dev, "\t# ~p\n", [Term]);
+ #imul{imm_opt=ImmOpt, src=Src, temp=Temp} ->
+ io:format(Dev, "\timul ", []),
+ case ImmOpt of
+ [] -> ok;
+ Imm ->
+ pp_imm(Dev, Imm, true),
+ io:format(Dev, ", ", [])
+ end,
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_temp(Dev, Temp),
+ io:format(Dev, "\n", []);
+ #jcc{cc=Cc, label=Label} ->
+ io:format(Dev, "\tj~s .~s_~w\n", [cc_name(Cc), Pre, Label]);
+ #jmp_fun{'fun'=Fun, linkage=Linkage} ->
+ io:format(Dev, "\tjmp ", []),
+ pp_fun(Dev, Fun),
+ io:format(Dev, " ~w\n", [Linkage]);
+ #jmp_label{label=Label} ->
+ io:format(Dev, "\tjmp .~s_~w\n", [Pre, Label]);
+ #jmp_switch{temp=Temp, jtab=JTab, labels=Labels} ->
+ io:format(Dev, "\tjmp *{constant,~w}(,", [JTab]),
+ pp_temp(Dev, Temp),
+ io:format(Dev, ",4) #", []),
+ pp_labels(Dev, Labels, Pre),
+ io:format(Dev, "\n", []);
+ #label{label=Label} ->
+ io:format(Dev, ".~s_~w:~n", [Pre, Label]);
+ #lea{mem=Mem, temp=Temp} ->
+ io:format(Dev, "\tlea ", []),
+ pp_mem(Dev, Mem),
+ io:format(Dev, ", ", []),
+ pp_temp(Dev, Temp),
+ io:format(Dev, "\n", []);
+ #move{src=Src, dst=Dst} ->
+ io:format(Dev, "\tmov ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #move64{} ->
+ pp_move64(Dev, I);
+ #movsx{src=Src, dst=Dst} ->
+ io:format(Dev, "\tmovsx ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #movzx{src=Src, dst=Dst} ->
+ io:format(Dev, "\tmovzx ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #pseudo_call{'fun'=Fun, sdesc=SDesc, contlab=ContLab, linkage=Linkage} ->
+ io:format(Dev, "\tpseudo_call ", []),
+ pp_fun(Dev, Fun),
+ io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]),
+ pp_sdesc(Dev, Pre, SDesc),
+ io:format(Dev, " ~w\n", [Linkage]);
+ #pseudo_jcc{cc=Cc, true_label=TrueLab, false_label=FalseLab, pred=Pred} ->
+ io:format(Dev, "\tpseudo_j~s ", [cc_name(Cc)]),
+ io:format(Dev, ".~s_~w # .~s_~w ~.2f\n",
+ [Pre, TrueLab, Pre, FalseLab, Pred]);
+ #pseudo_tailcall{'fun'=Fun, arity=Arity, stkargs=StkArgs, linkage=Linkage} ->
+ io:format(Dev, "\tpseudo_tailcall ", []),
+ pp_fun(Dev, Fun),
+ io:format(Dev, "~w (", [Arity]),
+ pp_args(Dev, StkArgs),
+ io:format(Dev, ") ~w\n", [Linkage]);
+ #pseudo_tailcall_prepare{} ->
+ io:format(Dev, "\tpseudo_tailcall_prepare\n", []);
+ #push{src=Src} ->
+ io:format(Dev, "\tpush ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, "\n", []);
+ #ret{npop=NPop} ->
+ io:format(Dev, "\tret $~s\n", [to_hex(NPop)]);
+ #shift{shiftop=ShiftOp, src=Src, dst=Dst} ->
+ io:format(Dev, "\t~s ", [alu_op_name(ShiftOp)]),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ #fp_binop{src=Src, dst=Dst, op=Op} ->
+ io:format(Dev, "\t~s ", [Op]),
+ pp_dst(Dev, Dst),
+ io:format(Dev, ", ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, "\n", []);
+ #fp_unop{arg=Arg, op=Op} ->
+ io:format(Dev, "\t~s ", [Op]),
+ case Arg of
+ []->
+ io:format(Dev, "\n", []);
+ _ ->
+ pp_args(Dev, [Arg]),
+ io:format(Dev, "\n", [])
+ end;
+ #fmove{src=Src, dst=Dst} ->
+ io:format(Dev, "\tfmove ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []);
+ _ ->
+ exit({?MODULE, pp_insn, {"unknown x86 instruction", I}})
+ end.
+
+-ifdef(HIPE_AMD64).
+pp_move64(Dev, I) ->
+ #move64{imm=Src, dst=Dst} = I,
+ io:format(Dev, "\tmov64 ", []),
+ pp_src(Dev, Src),
+ io:format(Dev, ", ", []),
+ pp_dst(Dev, Dst),
+ io:format(Dev, "\n", []).
+-else.
+pp_move64(_Dev, I) -> exit({?MODULE, I}).
+-endif.
+
+to_hex(N) ->
+ io_lib:format("~.16x", [N, "0x"]).
+
+pp_sdesc(Dev, Pre, #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) ->
+ pp_sdesc_exnlab(Dev, Pre, ExnLab),
+ io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]),
+ pp_sdesc_live(Dev, Live),
+ io:format(Dev, "]", []).
+
+pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []);
+pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]).
+
+pp_sdesc_live(_, {}) -> ok;
+pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1).
+
+pp_sdesc_live(Dev, Live, I) ->
+ io:format(Dev, "~s", [to_hex(element(I, Live))]),
+ if I < tuple_size(Live) ->
+ io:format(Dev, ",", []),
+ pp_sdesc_live(Dev, Live, I+1);
+ true -> ok
+ end.
+
+pp_labels(Dev, [Label|Labels], Pre) ->
+ io:format(Dev, " .~s_~w", [Pre, Label]),
+ pp_labels(Dev, Labels, Pre);
+pp_labels(_, [], _) ->
+ ok.
+
+pp_fun(Dev, Fun) ->
+ case Fun of
+ #x86_mfa{m=M, f=F, a=A} ->
+ io:format(Dev, "~w:~w/~w", [M, F, A]);
+ #x86_prim{prim=Prim} ->
+ io:format(Dev, "~w", [Prim]);
+ _ -> % temp or mem
+ io:format(Dev, "*", []),
+ pp_dst(Dev, Fun)
+ end.
+
+alu_op_name(Op) -> Op.
+
+cc_name(Cc) -> Cc.
+
+pp_hard_reg(Dev, Reg) ->
+ io:format(Dev, "~s", [?HIPE_X86_REGISTERS:reg_name(Reg)]).
+
+type_tag('tagged') -> "t";
+type_tag('untagged') -> "u";
+type_tag('double') -> "d".
+
+pp_temp(Dev, #x86_temp{reg=Reg, type=Type}) ->
+ case Type of
+ double ->
+ Tag = type_tag(Type),
+ io:format(Dev, "~s~w", [Tag, Reg]);
+ _ ->
+ case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of
+ true ->
+ pp_hard_reg(Dev, Reg);
+ false ->
+ Tag = type_tag(Type),
+ io:format(Dev, "~s~w", [Tag, Reg])
+ end
+ end.
+
+pp_fpreg(Dev, #x86_fpreg{reg=Reg, pseudo=Pseudo})->
+ case Pseudo of
+ true -> io:format(Dev, "pseudo_fp(~w)", [Reg]);
+ _ -> io:format(Dev, "st(~w)", [Reg])
+ end.
+
+pp_imm(Dev, #x86_imm{value=Value}, Dollar) ->
+ if Dollar =:= true -> io:format(Dev, [$$], []);
+ true -> ok
+ end,
+ if is_integer(Value) -> io:format(Dev, "~s", [to_hex(Value)]);
+ true -> io:format(Dev, "~w", [Value])
+ end.
+
+pp_mem(Dev, #x86_mem{base=Base, off=Off}) ->
+ pp_off(Dev, Off),
+ case Base of
+ [] ->
+ ok;
+ _ ->
+ io:format(Dev, "(", []),
+ pp_temp(Dev, Base),
+ io:format(Dev, ")", [])
+ end.
+
+pp_off(Dev, Off) ->
+ pp_src(Dev, Off, false).
+
+pp_src(Dev, Src) ->
+ pp_src(Dev, Src, true).
+
+pp_src(Dev, Src, Dollar) ->
+ case Src of
+ #x86_temp{} ->
+ pp_temp(Dev, Src);
+ #x86_imm{} ->
+ pp_imm(Dev, Src, Dollar);
+ #x86_mem{} ->
+ pp_mem(Dev, Src);
+ #x86_fpreg{} ->
+ pp_fpreg(Dev, Src)
+ end.
+
+pp_dst(Dev, Dst) ->
+ pp_src(Dev, Dst).
+
+pp_args(Dev, [A|As]) ->
+ pp_src(Dev, A),
+ pp_comma_args(Dev, As);
+pp_args(_, []) ->
+ ok.
+
+pp_comma_args(Dev, [A|As]) ->
+ io:format(Dev, ", ", []),
+ pp_src(Dev, A),
+ pp_comma_args(Dev, As);
+pp_comma_args(_, []) ->
+ ok.
diff --git a/lib/hipe/x86/hipe_x86_ra.erl b/lib/hipe/x86/hipe_x86_ra.erl
new file mode 100644
index 0000000000..d50b9aabad
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra.erl
@@ -0,0 +1,99 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA, hipe_amd64_ra).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls).
+-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive).
+-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise).
+-define(HIPE_X86_SPECIFIC, hipe_amd64_specific).
+-else.
+-define(HIPE_X86_RA, hipe_x86_ra).
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(HIPE_X86_RA_LS, hipe_x86_ra_ls).
+-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive).
+-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise).
+-define(HIPE_X86_SPECIFIC, hipe_x86_specific).
+-endif.
+
+-module(?HIPE_X86_RA).
+-export([ra/2]).
+
+%%-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.
+-include("../main/hipe.hrl").
+
+ra(Defun0, Options) ->
+ %% ?HIPE_X86_PP:pp(Defun0),
+ {Defun1, Coloring_fp, SpillIndex} = ra_fp(Defun0, Options),
+ %% ?HIPE_X86_PP:pp(Defun1),
+ ?start_ra_instrumentation(Options,
+ length(hipe_x86:defun_code(Defun1)),
+ element(2,hipe_x86:defun_var_range(Defun1))),
+ {Defun2, Coloring}
+ = case proplists:get_value(regalloc, Options, coalescing) of
+ coalescing ->
+ ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc);
+ optimistic ->
+ ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc);
+ graph_color ->
+ ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc);
+ linear_scan ->
+ ?HIPE_X86_RA_LS:ra(Defun1, SpillIndex, Options);
+ naive ->
+ ?HIPE_X86_RA_NAIVE:ra(Defun1, Coloring_fp, Options);
+ _ ->
+ exit({unknown_regalloc_compiler_option,
+ proplists:get_value(regalloc,Options)})
+ end,
+ ?stop_ra_instrumentation(Options,
+ length(hipe_x86:defun_code(Defun2)),
+ element(2,hipe_x86:defun_var_range(Defun2))),
+ %% ?HIPE_X86_PP:pp(Defun2),
+ ?HIPE_X86_RA_FINALISE:finalise(Defun2, Coloring, Coloring_fp, Options).
+
+ra(Defun, SpillIndex, Options, RegAllocMod) ->
+ hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, ?HIPE_X86_SPECIFIC).
+
+-ifdef(HIPE_AMD64).
+ra_fp(Defun, Options) ->
+ case proplists:get_bool(inline_fp, Options) and
+ (proplists:get_value(regalloc, Options) =/= naive) of
+ true ->
+ case proplists:get_bool(x87, Options) of
+ true ->
+ hipe_amd64_ra_x87_ls:ra(Defun, Options);
+ false ->
+ hipe_regalloc_loop:ra_fp(Defun, Options,
+ hipe_coalescing_regalloc,
+ hipe_amd64_specific_sse2)
+ end;
+ false ->
+ {Defun,[],0}
+ end.
+-else.
+ra_fp(Defun, Options) ->
+ case proplists:get_bool(inline_fp, Options) of
+ true ->
+ hipe_x86_ra_x87_ls:ra(Defun, Options);
+ false ->
+ {Defun,[],0}
+ end.
+-endif.
diff --git a/lib/hipe/x86/hipe_x86_ra_finalise.erl b/lib/hipe/x86/hipe_x86_ra_finalise.erl
new file mode 100644
index 0000000000..10b4df05d2
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra_finalise.erl
@@ -0,0 +1,335 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%%
+%%% - apply temp -> reg/spill map from RA
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA_FINALISE, hipe_amd64_ra_finalise).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_X87, hipe_amd64_x87).
+-else.
+-define(HIPE_X86_RA_FINALISE, hipe_x86_ra_finalise).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_X87, hipe_x86_x87).
+-endif.
+
+-module(?HIPE_X86_RA_FINALISE).
+-export([finalise/4]).
+-include("../x86/hipe_x86.hrl").
+
+finalise(Defun, TempMap, FpMap, Options) ->
+ Defun1 = finalise_ra(Defun, TempMap, FpMap, Options),
+ case proplists:get_bool(x87, Options) of
+ true ->
+ ?HIPE_X86_X87:map(Defun1);
+ _ ->
+ Defun1
+ end.
+
+%%%
+%%% Finalise the temp->reg/spill mapping.
+%%% (XXX: maybe this should be merged with the main pass,
+%%% but I just want this to work now)
+%%%
+
+finalise_ra(Defun, [], [], _Options) ->
+ Defun;
+finalise_ra(Defun, TempMap, FpMap, Options) ->
+ Code = hipe_x86:defun_code(Defun),
+ {_, SpillLimit} = hipe_x86:defun_var_range(Defun),
+ Map = mk_ra_map(TempMap, SpillLimit),
+ FpMap0 = mk_ra_map_fp(FpMap, SpillLimit, Options),
+ NewCode = ra_code(Code, Map, FpMap0),
+ Defun#defun{code=NewCode}.
+
+ra_code(Code, Map, FpMap) ->
+ [ra_insn(I, Map, FpMap) || I <- Code].
+
+ra_insn(I, Map, FpMap) ->
+ case I of
+ #alu{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#alu{src=Src,dst=Dst};
+ #call{} ->
+ I;
+ #cmovcc{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#cmovcc{src=Src,dst=Dst};
+ #cmp{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#cmp{src=Src,dst=Dst};
+ #comment{} ->
+ I;
+ #fmove{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map, FpMap),
+ Dst = ra_opnd(Dst0, Map, FpMap),
+ I#fmove{src=Src,dst=Dst};
+ #fp_unop{arg=Arg0} ->
+ Arg = ra_opnd(Arg0, Map, FpMap),
+ I#fp_unop{arg=Arg};
+ #fp_binop{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map, FpMap),
+ Dst = ra_opnd(Dst0, Map, FpMap),
+ I#fp_binop{src=Src,dst=Dst};
+ #imul{src=Src0,temp=Temp0} ->
+ Src = ra_opnd(Src0, Map),
+ Temp = ra_temp(Temp0, Map),
+ I#imul{src=Src,temp=Temp};
+ #jcc{} ->
+ I;
+ #jmp_fun{'fun'=Fun0} ->
+ Fun = ra_opnd(Fun0, Map),
+ I#jmp_fun{'fun'=Fun};
+ #jmp_label{} ->
+ I;
+ #jmp_switch{temp=Temp0,jtab=JTab0} ->
+ Temp = ra_opnd(Temp0, Map),
+ JTab = ra_opnd(JTab0, Map),
+ I#jmp_switch{temp=Temp,jtab=JTab};
+ #label{} ->
+ I;
+ #lea{mem=Mem0,temp=Temp0} ->
+ Mem = ra_mem(Mem0, Map),
+ Temp = ra_temp(Temp0, Map),
+ I#lea{mem=Mem,temp=Temp};
+ #move{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#move{src=Src,dst=Dst};
+ #move64{dst=Dst0} ->
+ Dst = ra_opnd(Dst0, Map),
+ I#move64{dst=Dst};
+ #movsx{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#movsx{src=Src,dst=Dst};
+ #movzx{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#movzx{src=Src,dst=Dst};
+ #pseudo_call{'fun'=Fun0} ->
+ Fun = ra_opnd(Fun0, Map),
+ I#pseudo_call{'fun'=Fun};
+ #pseudo_jcc{} ->
+ I;
+ #pseudo_tailcall{'fun'=Fun0,stkargs=StkArgs0} ->
+ Fun = ra_opnd(Fun0, Map),
+ StkArgs = ra_args(StkArgs0, Map),
+ I#pseudo_tailcall{'fun'=Fun,stkargs=StkArgs};
+ #pseudo_tailcall_prepare{} ->
+ I;
+ #push{src=Src0} ->
+ Src = ra_opnd(Src0, Map),
+ I#push{src=Src};
+ #ret{} ->
+ I;
+ #shift{src=Src0,dst=Dst0} ->
+ Src = ra_opnd(Src0, Map),
+ Dst = ra_opnd(Dst0, Map),
+ I#shift{src=Src,dst=Dst};
+ _ ->
+ exit({?MODULE,ra_insn,I})
+ end.
+
+ra_args(Args, Map) ->
+ [ra_opnd(Opnd, Map) || Opnd <- Args].
+
+ra_opnd(Opnd, Map) ->
+ ra_opnd(Opnd, Map, gb_trees:empty()).
+ra_opnd(Opnd, Map, FpMap) ->
+ case Opnd of
+ #x86_temp{} -> ra_temp(Opnd, Map, FpMap);
+ #x86_mem{} -> ra_mem(Opnd, Map);
+ _ -> Opnd
+ end.
+
+ra_mem(Mem, Map) ->
+ #x86_mem{base=Base0,off=Off0} = Mem,
+ Base = ra_opnd(Base0, Map),
+ Off = ra_opnd(Off0, Map),
+ Mem#x86_mem{base=Base,off=Off}.
+
+ra_temp(Temp, Map) ->
+ ra_temp(Temp, Map, gb_trees:empty()).
+
+ra_temp(Temp, Map, FpMap) ->
+ Reg = hipe_x86:temp_reg(Temp),
+ case hipe_x86:temp_type(Temp) of
+ double ->
+ ra_temp_double(Temp, Reg, FpMap);
+ _->
+ case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of
+ true ->
+ Temp;
+ _ ->
+ case gb_trees:lookup(Reg, Map) of
+ {value,NewReg} -> Temp#x86_temp{reg=NewReg};
+ _ -> Temp
+ end
+ end
+ end.
+
+-ifdef(HIPE_AMD64).
+ra_temp_double(Temp, Reg, FpMap) ->
+ case hipe_amd64_registers:is_precoloured_sse2(Reg) of
+ true ->
+ Temp;
+ _ ->
+ case gb_trees:lookup(Reg, FpMap) of
+ {value,NewReg} -> Temp#x86_temp{reg=NewReg};
+ _ -> Temp
+ end
+ end.
+-else.
+ra_temp_double(Temp, Reg, FpMap) ->
+ case gb_trees:lookup(Reg, FpMap) of
+ {value,NewReg} ->
+ case hipe_x86_registers:is_precoloured_x87(NewReg) of
+ true -> hipe_x86:mk_fpreg(NewReg);
+ false ->
+ Temp#x86_temp{reg=NewReg}
+ end;
+ _ ->
+ Temp
+ end.
+-endif.
+
+mk_ra_map(TempMap, SpillLimit) ->
+ %% Build a partial map from pseudo to reg or spill.
+ %% Spills are represented as pseudos with indices above SpillLimit.
+ %% (I'd prefer to use negative indices, but that breaks
+ %% ?HIPE_X86_REGISTERS:is_precoloured/1.)
+ %% The frame mapping proper is unchanged, since spills look just like
+ %% ordinary (un-allocated) pseudos.
+ lists:foldl(fun(MapLet, Map) ->
+ {Key,Val} = conv_ra_maplet(MapLet, SpillLimit,
+ is_precoloured),
+ gb_trees:insert(Key, Val, Map)
+ end,
+ gb_trees:empty(),
+ TempMap).
+
+conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) ->
+ %% From should be a pseudo, or a hard reg mapped to itself.
+ if is_integer(From), From =< SpillLimit ->
+ case ?HIPE_X86_REGISTERS:IsPrecoloured(From) of
+ false -> [];
+ _ ->
+ case To of
+ {reg, From} -> [];
+ _ -> exit({?MODULE,conv_ra_maplet,MapLet})
+ end
+ end;
+ true -> exit({?MODULE,conv_ra_maplet,MapLet})
+ end,
+ %% end of From check
+ case To of
+ {reg, NewReg} ->
+ %% NewReg should be a hard reg, or a pseudo mapped
+ %% to itself (formals are handled this way).
+ if is_integer(NewReg) ->
+ case ?HIPE_X86_REGISTERS:IsPrecoloured(NewReg) of
+ true -> [];
+ _ -> if From =:= NewReg -> [];
+ true ->
+ exit({?MODULE,conv_ra_maplet,MapLet})
+ end
+ end;
+ true -> exit({?MODULE,conv_ra_maplet,MapLet})
+ end,
+ %% end of NewReg check
+ {From, NewReg};
+ {spill, SpillIndex} ->
+ %% SpillIndex should be >= 0.
+ if is_integer(SpillIndex), SpillIndex >= 0 -> [];
+ true -> exit({?MODULE,conv_ra_maplet,MapLet})
+ end,
+ %% end of SpillIndex check
+ ToTempNum = SpillLimit+SpillIndex+1,
+ MaxTempNum = hipe_gensym:get_var(x86),
+ if MaxTempNum >= ToTempNum -> ok;
+ true -> hipe_gensym:set_var(x86, ToTempNum)
+ end,
+ {From, ToTempNum};
+ _ -> exit({?MODULE,conv_ra_maplet,MapLet})
+ end.
+
+mk_ra_map_x87(FpMap, SpillLimit) ->
+ lists:foldl(fun(MapLet, Map) ->
+ {Key,Val} = conv_ra_maplet(MapLet, SpillLimit,
+ is_precoloured_x87),
+ gb_trees:insert(Key, Val, Map)
+ end,
+ gb_trees:empty(),
+ FpMap).
+
+-ifdef(HIPE_AMD64).
+mk_ra_map_sse2(FpMap, SpillLimit) ->
+ lists:foldl(fun(MapLet, Map) ->
+ {Key,Val} = conv_ra_maplet(MapLet, SpillLimit,
+ is_precoloured_sse2),
+ gb_trees:insert(Key, Val, Map)
+ end,
+ gb_trees:empty(),
+ FpMap).
+
+mk_ra_map_fp(FpMap, SpillLimit, Options) ->
+ case proplists:get_bool(x87, Options) of
+ true -> mk_ra_map_x87(FpMap, SpillLimit);
+ false -> mk_ra_map_sse2(FpMap, SpillLimit)
+ end.
+-else.
+mk_ra_map_fp(FpMap, SpillLimit, _Options) ->
+ mk_ra_map_x87(FpMap, SpillLimit).
+-endif.
+
+-ifdef(notdef).
+conv_ra_maplet_fp(MapLet = {From,To}, SpillLimit) ->
+ %% From should be a pseudo
+ if is_integer(From), From =< SpillLimit -> [];
+ true -> exit({?MODULE,conv_ra_maplet_fp,MapLet})
+ end,
+ %% end of From check
+ case To of
+ {reg, NewReg} ->
+ case hipe_x86_registers:is_precoloured_x87(NewReg) of
+ true-> [];
+ false -> exit({?MODULE,conv_ra_maplet_fp,MapLet})
+ end,
+ %% end of NewReg check.
+ {From, NewReg};
+ {spill, SpillIndex} ->
+ %% SpillIndex should be >= 0.
+ if is_integer(SpillIndex), SpillIndex >= 0 -> [];
+ true -> exit({?MODULE,conv_ra_maplet_fp,MapLet})
+ end,
+ %% end of SpillIndex check
+ ToTempNum = SpillLimit+SpillIndex+1,
+ MaxTempNum = hipe_gensym:get_var(x86),
+ if MaxTempNum >= ToTempNum -> [];
+ true -> hipe_gensym:set_var(x86, ToTempNum)
+ end,
+ {From, ToTempNum};
+ _ -> exit({?MODULE,conv_ra_maplet_fp,MapLet})
+ end.
+-endif.
diff --git a/lib/hipe/x86/hipe_x86_ra_ls.erl b/lib/hipe/x86/hipe_x86_ra_ls.erl
new file mode 100644
index 0000000000..ab7b6708ad
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra_ls.erl
@@ -0,0 +1,85 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% Linear Scan register allocator for x86
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_SPECIFIC, hipe_amd64_specific).
+-else.
+-define(HIPE_X86_RA_LS, hipe_x86_ra_ls).
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_SPECIFIC, hipe_x86_specific).
+-endif.
+
+-module(?HIPE_X86_RA_LS).
+-export([ra/3,regalloc/7]).
+-define(HIPE_INSTRUMENT_COMPILER, true). %% Turn on instrumentation.
+-include("../main/hipe.hrl").
+
+ra(Defun, SpillIndex, Options) ->
+ NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options),
+ CFG = hipe_x86_cfg:init(NewDefun),
+
+ SpillLimit = ?HIPE_X86_SPECIFIC:number_of_temporaries(
+ CFG),
+ ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))),
+ alloc(NewDefun, SpillIndex, SpillLimit, Options).
+
+
+alloc(Defun, SpillIndex, SpillLimit, Options) ->
+ ?inc_counter(ra_iteration_counter,1),
+ %% ?HIPE_X86_PP:pp(Defun),
+ CFG = hipe_x86_cfg:init(Defun),
+ {Coloring, NewSpillIndex} =
+ regalloc(
+ CFG,
+ ?HIPE_X86_REGISTERS:allocatable()--
+ [?HIPE_X86_REGISTERS:temp1(),
+ ?HIPE_X86_REGISTERS:temp0()],
+ [hipe_x86_cfg:start_label(CFG)],
+ SpillIndex, SpillLimit, Options,
+ ?HIPE_X86_SPECIFIC),
+ {NewDefun, _DidSpill} =
+ ?HIPE_X86_RA_POSTCONDITIONS:check_and_rewrite(
+ Defun, Coloring, 'linearscan'),
+ %% ?HIPE_X86_PP:pp(NewDefun),
+ TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC),
+ {TempMap2,NewSpillIndex2} =
+ hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options,
+ ?HIPE_X86_SPECIFIC, TempMap),
+ Coloring2 =
+ hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2),
+ case proplists:get_bool(verbose_spills, Options) of
+ true ->
+ ?msg("Stack slot size: ~p~n",[NewSpillIndex2-SpillIndex]);
+ false ->
+ ok
+ end,
+ ?add_spills(Options, NewSpillIndex),
+ {NewDefun, Coloring2}.
+
+regalloc(CFG,PhysRegs,Entrypoints, SpillIndex, DontSpill, Options, Target) ->
+ hipe_ls_regalloc:regalloc(CFG,PhysRegs,Entrypoints, SpillIndex,
+ DontSpill, Options, Target).
diff --git a/lib/hipe/x86/hipe_x86_ra_naive.erl b/lib/hipe/x86/hipe_x86_ra_naive.erl
new file mode 100644
index 0000000000..e9b99cd2c5
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra_naive.erl
@@ -0,0 +1,409 @@
+%%% -*- erlang-indent-level: 2 -*-
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%% simple local x86 regalloc
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA_NAIVE, hipe_amd64_ra_naive).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_SPECIFIC_FP, hipe_amd64_specific_sse2).
+-define(ECX, rcx).
+-else.
+-define(HIPE_X86_RA_NAIVE, hipe_x86_ra_naive).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_SPECIFIC_FP, hipe_x86_specific_x87).
+-define(ECX, ecx).
+-endif.
+
+-module(?HIPE_X86_RA_NAIVE).
+-export([ra/3]).
+
+-include("../x86/hipe_x86.hrl").
+-define(HIPE_INSTRUMENT_COMPILER, true). % enable instrumentation
+-include("../main/hipe.hrl").
+
+ra(X86Defun, Coloring_fp, Options) ->
+ #defun{code=Code0} = X86Defun,
+ Code1 = do_insns(Code0),
+ NofSpilledFloats = count_non_float_spills(Coloring_fp),
+ NofFloats = length(Coloring_fp),
+ ?add_spills(Options, hipe_gensym:get_var(x86) -
+ ?HIPE_X86_REGISTERS:first_virtual()-
+ NofSpilledFloats -
+ NofFloats),
+ TempMap = [],
+ {X86Defun#defun{code=Code1,
+ var_range={0, hipe_gensym:get_var(x86)}},
+ TempMap}.
+
+count_non_float_spills(Coloring_fp) ->
+ count_non_float_spills(Coloring_fp, 0).
+
+count_non_float_spills([{_,To}|Tail], Num) ->
+ case ?HIPE_X86_SPECIFIC_FP:is_precoloured(To) of
+ true ->
+ count_non_float_spills(Tail, Num);
+ false ->
+ count_non_float_spills(Tail, Num+1)
+ end;
+count_non_float_spills([], Num) ->
+ Num.
+
+do_insns([I|Insns]) ->
+ do_insn(I) ++ do_insns(Insns);
+do_insns([]) ->
+ [].
+
+do_insn(I) -> % Insn -> Insn list
+ case I of
+ #alu{} ->
+ do_alu(I);
+ #cmp{} ->
+ do_cmp(I);
+ #imul{} ->
+ do_imul(I);
+ #jmp_switch{} ->
+ do_jmp_switch(I);
+ #lea{} ->
+ do_lea(I);
+ #move{} ->
+ do_move(I);
+ #move64{} ->
+ do_move64(I);
+ #movzx{} ->
+ do_movx(I);
+ #movsx{} ->
+ do_movx(I);
+ #fmove{} ->
+ do_fmove(I);
+ #fp_unop{} ->
+ do_fp_unop(I);
+ #fp_binop{} ->
+ do_fp_binop(I);
+ #shift{} ->
+ do_shift(I);
+ #label{} ->
+ [I];
+ #pseudo_jcc{} ->
+ [I];
+ #pseudo_call{} ->
+ [I];
+ #ret{} ->
+ [I];
+ #pseudo_tailcall_prepare{} ->
+ [I];
+ #pseudo_tailcall{} ->
+ [I];
+ #push{} ->
+ [I];
+ #jmp_label{} ->
+ [I];
+ #comment{} ->
+ [I];
+ _ ->
+ io:format("Unknown Instruction = ~w\n", [I]),
+ exit({?MODULE, unknown_instruction, I})
+ end.
+
+%%% Fix an alu op.
+
+do_alu(I) ->
+ #alu{src=Src0,dst=Dst0} = I,
+ {FixSrc,Src,FixDst,Dst} = do_binary(Src0, Dst0),
+ FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}].
+
+%%% Fix a cmp op.
+
+do_cmp(I) ->
+ #cmp{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+ FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}].
+
+%%% Fix an imul op.
+
+do_imul(I) ->
+ #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I,
+ {FixSrc,Src} = fix_src_operand(Src0), % may use temp0
+ {FixTempSrc,Temp,FixTempDst} =
+ case temp_is_pseudo(Temp0) of
+ false ->
+ {[], Temp0, []};
+ true ->
+ Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), 'untagged'),
+ {case ImmOpt of
+ [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src
+ _ -> [] % temp = src * imm
+ end,
+ Reg,
+ [hipe_x86:mk_move(Reg, Temp0)]}
+ end,
+ FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst.
+
+%%% Fix a jmp_switch op.
+
+-ifdef(HIPE_AMD64).
+do_jmp_switch(I) ->
+ #jmp_switch{temp=Temp, jtab=Tab} = I,
+ case temp_is_pseudo(Temp) of
+ false ->
+ case temp_is_pseudo(Tab) of
+ false ->
+ [I];
+ true ->
+ Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'),
+ [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{jtab=Reg}]
+ end;
+ true ->
+ Reg = hipe_x86:mk_temp(hipe_amd64_registers:temp1(), 'untagged'),
+ case temp_is_pseudo(Tab) of
+ false ->
+ [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}];
+ true ->
+ Reg2 = hipe_x86:mk_temp(hipe_amd64_registers:temp0(), 'untagged'),
+ [hipe_x86:mk_move(Temp, Reg),
+ hipe_x86:mk_move(Tab, Reg2),
+ I#jmp_switch{temp=Reg, jtab=Reg2}]
+ end
+ end.
+-else.
+do_jmp_switch(I) ->
+ #jmp_switch{temp=Temp} = I,
+ case temp_is_pseudo(Temp) of
+ false ->
+ [I];
+ true ->
+ Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'),
+ [hipe_x86:mk_move(Temp, Reg), I#jmp_switch{temp=Reg}]
+ end.
+-endif.
+
+%%% Fix a lea op.
+
+do_lea(I) ->
+ #lea{temp=Temp} = I,
+ case temp_is_pseudo(Temp) of
+ false ->
+ [I];
+ true ->
+ Reg = hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), 'untagged'),
+ [I#lea{temp=Reg}, hipe_x86:mk_move(Reg, Temp)]
+ end.
+
+%%% Fix a move op.
+
+do_move(I) ->
+ #move{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+ FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}].
+
+-ifdef(HIPE_AMD64).
+do_move64(I) ->
+ #move64{dst=Dst} = I,
+ case is_mem_opnd(Dst) of
+ false ->
+ [I];
+ true ->
+ Reg = hipe_amd64_registers:temp1(),
+ NewDst = clone(Dst, Reg),
+ [I#move64{dst=NewDst}, hipe_x86:mk_move(NewDst, Dst)]
+ end.
+-else.
+do_move64(I) -> exit({?MODULE, I}).
+-endif.
+
+do_movx(I) ->
+ {{FixSrc, Src}, {FixDst, Dst}} =
+ case I of
+ #movsx{src=Src0,dst=Dst0} ->
+ {fix_src_operand(Src0), fix_dst_operand(Dst0)};
+ #movzx{src=Src0,dst=Dst0} ->
+ {fix_src_operand(Src0), fix_dst_operand(Dst0)}
+ end,
+ Reg = ?HIPE_X86_REGISTERS:temp0(),
+ Dst2 = clone(Dst, Reg),
+ I2 = case is_mem_opnd(Dst) of
+ true ->
+ Reg = ?HIPE_X86_REGISTERS:temp0(),
+ Dst2 = clone(Dst, Reg),
+ case I of
+ #movsx{} ->
+ [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)];
+ #movzx{} ->
+ [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]
+ end;
+ false ->
+ case I of
+ #movsx{} ->
+ [hipe_x86:mk_movsx(Src, Dst)];
+ #movzx{} ->
+ [hipe_x86:mk_movzx(Src, Dst)]
+ end
+ end,
+ FixSrc ++ FixDst ++ I2.
+
+
+%%% Fix a fmove op.
+%% conv_to_float
+do_fmove(I=#fmove{src=#x86_temp{type=untagged},
+ dst=#x86_temp{type=double}}) ->
+ #fmove{src=Src0,dst=Dst0} = I,
+ Src = clone(Src0, ?HIPE_X86_REGISTERS:temp0()),
+ Dst = clone(Dst0, ?HIPE_X86_REGISTERS:temp1()),
+ [hipe_x86:mk_move(Src0, Src),
+ I#fmove{src=Src, dst=Dst},
+ hipe_x86:mk_fmove(Dst, Dst0)];
+%% fmove
+do_fmove(I) ->
+ #fmove{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, FixDst, Dst} = do_binary(Src0, Dst0),
+ FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}].
+
+do_fp_unop(I) ->
+ #fp_unop{arg=Arg} = I,
+ case is_mem_opnd(Arg) of
+ false ->
+ [I];
+ true ->
+ Reg = ?HIPE_X86_REGISTERS:temp1(),
+ NewArg = clone(Arg, Reg),
+ [hipe_x86:mk_fmove(Arg, NewArg),
+ I#fp_unop{arg=NewArg},
+ hipe_x86:mk_fmove(NewArg, Arg)]
+ end.
+
+do_fp_binop(I) ->
+ #fp_binop{src=Src0, dst=Dst0} = I,
+ {FixSrc, Src} = fix_src_operand(Src0),
+ {FixDst, Dst} = fix_dst_operand(Dst0),
+ Reg = ?HIPE_X86_REGISTERS:temp1(),
+ Dst2 = clone(Dst, Reg),
+ FixSrc ++ FixDst ++ [hipe_x86:mk_fmove(Dst, Dst2),
+ I#fp_binop{src=Src, dst=Dst2},
+ hipe_x86:mk_fmove(Dst2, Dst)].
+
+do_shift(I) ->
+ #shift{src=Src0,dst=Dst0} = I,
+ {FixDst, Dst} = fix_dst_operand(Dst0),
+ Reg = ?HIPE_X86_REGISTERS:?ECX(),
+ case Src0 of
+ #x86_imm{} ->
+ FixDst ++ [I#shift{dst=Dst}];
+ #x86_temp{reg=Reg} ->
+ FixDst ++ [I#shift{dst=Dst}]
+ end.
+
+%%% Fix the operands of a binary op.
+%%% 1. remove pseudos from any explicit memory operands
+%%% 2. if both operands are (implicit or explicit) memory operands,
+%%% move src to a reg and use reg as src in the original insn
+
+do_binary(Src0, Dst0) ->
+ {FixSrc, Src} = fix_src_operand(Src0),
+ {FixDst, Dst} = fix_dst_operand(Dst0),
+ {FixSrc3, Src3} =
+ case is_mem_opnd(Src) of
+ false ->
+ {FixSrc, Src};
+ true ->
+ case is_mem_opnd(Dst) of
+ false ->
+ {FixSrc, Src};
+ true ->
+ Reg = ?HIPE_X86_REGISTERS:temp0(),
+ Src2 = clone(Src, Reg),
+ FixSrc2 = FixSrc ++ [mk_move(Src, Src2)],
+ {FixSrc2, Src2}
+ end
+ end,
+ {FixSrc3, Src3, FixDst, Dst}.
+
+%%% Fix any x86_mem operand to not refer to any pseudos.
+%%% The fixup may use additional instructions and registers.
+%%% 'src' operands may clobber '%temp0'.
+%%% 'dst' operands may clobber '%temp1'.
+
+fix_src_operand(Opnd) ->
+ fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp0()).
+
+fix_dst_operand(Opnd) ->
+ fix_mem_operand(Opnd, ?HIPE_X86_REGISTERS:temp1()).
+
+fix_mem_operand(Opnd, Reg) -> % -> {[fixupcode], newop}
+ case Opnd of
+ #x86_mem{base=Base,off=Off} ->
+ case is_mem_opnd(Base) of
+ false ->
+ case src_is_pseudo(Off) of
+ false ->
+ {[], Opnd};
+ true -> % pseudo(reg)
+ Temp = clone(Off, Reg),
+ {[hipe_x86:mk_move(Off, Temp)],
+ Opnd#x86_mem{off=Temp}}
+ end;
+ true ->
+ Temp = clone(Base, Reg),
+ case src_is_pseudo(Off) of
+ false -> % imm/reg(pseudo)
+ {[hipe_x86:mk_move(Base, Temp)],
+ Opnd#x86_mem{base=Temp}};
+ true -> % pseudo1(pseudo0)
+ {[hipe_x86:mk_move(Base, Temp),
+ hipe_x86:mk_alu('add', Off, Temp)],
+ Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)}}
+ end
+ end;
+ _ ->
+ {[], Opnd}
+ end.
+
+%%% Check if an operand denotes a memory cell (mem or pseudo).
+
+is_mem_opnd(Opnd) ->
+ case Opnd of
+ #x86_mem{} -> true;
+ #x86_temp{} -> temp_is_pseudo(Opnd);
+ _ -> false
+ end.
+
+%%% Check if an operand is a pseudo-Temp.
+
+src_is_pseudo(Src) ->
+ case hipe_x86:is_temp(Src) of
+ true -> temp_is_pseudo(Src);
+ false -> false
+ end.
+
+temp_is_pseudo(Temp) ->
+ not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))).
+
+%%% Make Reg a clone of Dst (attach Dst's type to Reg).
+
+clone(Dst, Reg) ->
+ Type =
+ case Dst of
+ #x86_mem{} -> hipe_x86:mem_type(Dst);
+ #x86_temp{} -> hipe_x86:temp_type(Dst)
+ end,
+ hipe_x86:mk_temp(Reg, Type).
+
+mk_move(Src, Dst=#x86_temp{type=double}) ->
+ hipe_x86:mk_fmove(Src, Dst);
+mk_move(Src, Dst) ->
+ hipe_x86:mk_move(Src, Dst).
diff --git a/lib/hipe/x86/hipe_x86_ra_postconditions.erl b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
new file mode 100644
index 0000000000..0b70764daf
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra_postconditions.erl
@@ -0,0 +1,452 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA_POSTCONDITIONS, hipe_amd64_ra_postconditions).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(HIPE_X86_SPECIFIC, hipe_amd64_specific).
+-define(ECX, rcx).
+-else.
+-define(HIPE_X86_RA_POSTCONDITIONS, hipe_x86_ra_postconditions).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(HIPE_X86_SPECIFIC, hipe_x86_specific).
+-define(ECX, ecx).
+-endif.
+
+-module(?HIPE_X86_RA_POSTCONDITIONS).
+
+-export([check_and_rewrite/3]).
+
+-include("../x86/hipe_x86.hrl").
+-define(HIPE_INSTRUMENT_COMPILER, true).
+-include("../main/hipe.hrl").
+-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)).
+
+check_and_rewrite(Defun, Coloring, Strategy) ->
+ %% io:format("Converting\n"),
+ TempMap = hipe_temp_map:cols2tuple(Coloring, ?HIPE_X86_SPECIFIC),
+ %% io:format("Rewriting\n"),
+ #defun{code=Code0} = Defun,
+ {Code1, DidSpill} = do_insns(Code0, TempMap, Strategy, [], false),
+ {Defun#defun{code=Code1,var_range={0,hipe_gensym:get_var(x86)}},
+ DidSpill}.
+
+do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) ->
+ {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy),
+ do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1);
+do_insns([], _TempMap, _Strategy, Accum, DidSpill) ->
+ {lists:reverse(Accum), DidSpill}.
+
+do_insn(I, TempMap, Strategy) -> % Insn -> {Insn list, DidSpill}
+ case I of
+ #alu{} ->
+ do_alu(I, TempMap, Strategy);
+ #cmp{} ->
+ do_cmp(I, TempMap, Strategy);
+ #imul{} ->
+ do_imul(I, TempMap, Strategy);
+ #jmp_switch{} ->
+ do_jmp_switch(I, TempMap, Strategy);
+ #lea{} ->
+ do_lea(I, TempMap, Strategy);
+ #move{} ->
+ do_move(I, TempMap, Strategy);
+ #move64{} ->
+ do_move64(I, TempMap, Strategy);
+ #movsx{} ->
+ do_movx(I, TempMap, Strategy);
+ #movzx{} ->
+ do_movx(I, TempMap, Strategy);
+ #fmove{} ->
+ do_fmove(I, TempMap, Strategy);
+ #shift{} ->
+ do_shift(I, TempMap, Strategy);
+ _ ->
+ %% comment, jmp*, label, pseudo_call, pseudo_jcc, pseudo_tailcall,
+ %% pseudo_tailcall_prepare, push, ret
+ {[I], false}
+ end.
+
+%%% Fix an alu op.
+
+do_alu(I, TempMap, Strategy) ->
+ #alu{src=Src0,dst=Dst0} = I,
+ {FixSrc,Src,FixDst,Dst,DidSpill} =
+ do_binary(Src0, Dst0, TempMap, Strategy),
+ {FixSrc ++ FixDst ++ [I#alu{src=Src,dst=Dst}], DidSpill}.
+
+%%% Fix a cmp op.
+
+do_cmp(I, TempMap, Strategy) ->
+ #cmp{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, FixDst, Dst, DidSpill} =
+ do_binary(Src0, Dst0, TempMap, Strategy),
+ {FixSrc ++ FixDst ++ [I#cmp{src=Src,dst=Dst}], DidSpill}.
+
+%%% Fix an imul op.
+
+do_imul(I, TempMap, Strategy) ->
+ #imul{imm_opt=ImmOpt,src=Src0,temp=Temp0} = I,
+ {FixSrc,Src,DidSpill1} = fix_src_operand(Src0, TempMap, Strategy), % temp1
+ {FixTempSrc,Temp,FixTempDst,DidSpill2} =
+ case is_spilled(Temp0, TempMap) of
+ false ->
+ {[], Temp0, [], false};
+ true ->
+ Reg = spill_temp0('untagged', Strategy),
+ {case ImmOpt of
+ [] -> [hipe_x86:mk_move(Temp0, Reg)]; % temp *= src
+ _ -> [] % temp = src * imm
+ end,
+ Reg,
+ [hipe_x86:mk_move(Reg, Temp0)],
+ true}
+ end,
+ {FixSrc ++ FixTempSrc ++ [I#imul{src=Src,temp=Temp}] ++ FixTempDst,
+ DidSpill1 or DidSpill2}.
+
+%%% Fix a jmp_switch op.
+
+-ifdef(HIPE_AMD64).
+do_jmp_switch(I, TempMap, Strategy) ->
+ #jmp_switch{temp=Temp, jtab=Tab} = I,
+ case is_spilled(Temp, TempMap) of
+ false ->
+ case is_spilled(Tab, TempMap) of
+ false ->
+ {[I], false};
+ true ->
+ NewTab = spill_temp('untagged', Strategy),
+ {[hipe_x86:mk_move(Tab, NewTab), I#jmp_switch{jtab=Tab}],
+ true}
+ end;
+ true ->
+ case is_spilled(Tab, TempMap) of
+ false ->
+ NewTmp = spill_temp('untagged', Strategy),
+ {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}],
+ true};
+ true ->
+ NewTmp = spill_temp('untagged', Strategy),
+ NewTab = spill_temp0('untagged', Strategy),
+ {[hipe_x86:mk_move(Temp, NewTmp),
+ hipe_x86:mk_move(Tab, NewTab),
+ I#jmp_switch{temp=NewTmp, jtab=NewTab}],
+ true}
+ end
+ end.
+-else. % not AMD64
+do_jmp_switch(I, TempMap, Strategy) ->
+ #jmp_switch{temp=Temp} = I,
+ case is_spilled(Temp, TempMap) of
+ false ->
+ {[I], false};
+ true ->
+ NewTmp = spill_temp('untagged', Strategy),
+ {[hipe_x86:mk_move(Temp, NewTmp), I#jmp_switch{temp=NewTmp}],
+ true}
+ end.
+-endif. % not AMD64
+
+%%% Fix a lea op.
+
+do_lea(I, TempMap, Strategy) ->
+ #lea{temp=Temp} = I,
+ case is_spilled(Temp, TempMap) of
+ false ->
+ {[I], false};
+ true ->
+ NewTmp = spill_temp('untagged', Strategy),
+ {[I#lea{temp=NewTmp}, hipe_x86:mk_move(NewTmp, Temp)],
+ true}
+ end.
+
+%%% Fix a move op.
+
+do_move(I, TempMap, Strategy) ->
+ #move{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, FixDst, Dst, DidSpill} =
+ do_check_byte_move(Src0, Dst0, TempMap, Strategy),
+ {FixSrc ++ FixDst ++ [I#move{src=Src,dst=Dst}],
+ DidSpill}.
+
+-ifdef(HIPE_AMD64).
+
+%%% AMD64 has no issues with byte moves.
+do_check_byte_move(Src0, Dst0, TempMap, Strategy) ->
+ do_binary(Src0, Dst0, TempMap, Strategy).
+
+-else. % not AMD64
+
+%%% x86 can only do byte moves to a subset of the integer registers.
+do_check_byte_move(Src0, Dst0, TempMap, Strategy) ->
+ case Dst0 of
+ #x86_mem{type=byte} ->
+ do_byte_move(Src0, Dst0, TempMap, Strategy);
+ _ ->
+ do_binary(Src0, Dst0, TempMap, Strategy)
+ end.
+
+do_byte_move(Src0, Dst0, TempMap, Strategy) ->
+ {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy),
+ {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy),
+ Reg = hipe_x86_registers:eax(),
+ {FixSrc3, Src3} = % XXX: this just checks Src, the result is known!
+ case Src of
+ #x86_imm{} ->
+ {FixSrc, Src};
+ #x86_temp{reg=Reg} -> % small moves must start from reg 1->4
+ {FixSrc, Src} % so variable sources are always put in eax
+ end,
+ {FixSrc3, Src3, FixDst, Dst,
+ DidSpill2 or DidSpill1}.
+
+-endif. % not AMD64
+
+%%% Fix a move64 op.
+
+do_move64(I, TempMap, Strategy) ->
+ #move64{dst=Dst} = I,
+ case�is_spilled(Dst, TempMap) of
+ false ->
+ {[I], false};
+ true ->
+ Reg = clone(Dst, Strategy),
+ {[I#move64{dst=Reg}, hipe_x86:mk_move(Reg, Dst)], true}
+ end.
+
+%%% Fix a movx op.
+
+do_movx(I, TempMap, Strategy) ->
+ {{FixSrc, Src, DidSpill1}, {FixDst, Dst, DidSpill2}} =
+ case I of
+ #movsx{src=Src0,dst=Dst0} ->
+ {fix_src_operand(Src0, TempMap, Strategy),
+ fix_dst_operand(Dst0, TempMap, Strategy)};
+ #movzx{src=Src0,dst=Dst0} ->
+ {fix_src_operand(Src0, TempMap, Strategy),
+ fix_dst_operand(Dst0, TempMap, Strategy)}
+ end,
+ {I3, DidSpill3} =
+ case is_spilled(Dst, TempMap) of
+ false ->
+ I2 = case I of
+ #movsx{} ->
+ [hipe_x86:mk_movsx(Src, Dst)];
+ #movzx{} ->
+ [hipe_x86:mk_movzx(Src, Dst)]
+ end,
+ {I2, false};
+ true ->
+ Dst2 = clone(Dst, Strategy),
+ I2 =
+ case I of
+ #movsx{} ->
+ [hipe_x86:mk_movsx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)];
+ #movzx{} ->
+ [hipe_x86:mk_movzx(Src, Dst2), hipe_x86:mk_move(Dst2, Dst)]
+ end,
+ {I2, true}
+ end,
+ {FixSrc++FixDst++I3,
+ DidSpill3 or DidSpill2 or DidSpill1}.
+
+%%% Fix an fmove op.
+
+do_fmove(I, TempMap, Strategy) ->
+ #fmove{src=Src0,dst=Dst0} = I,
+ {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy),
+ {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy),
+ %% fmoves from memory position to memory position is handled
+ %% by the f.p. register allocator.
+ {FixSrc ++ FixDst ++ [I#fmove{src=Src,dst=Dst}],
+ DidSpill1 or DidSpill2}.
+
+%%% Fix a shift operation.
+%%% 1. remove pseudos from any explicit memory operands
+%%% 2. if the source is a register or memory position
+%%% make sure to move it to %ecx
+
+do_shift(I, TempMap, Strategy) ->
+ #shift{src=Src0,dst=Dst0} = I,
+ {FixDst, Dst, DidSpill} = fix_dst_operand(Dst0, TempMap, Strategy),
+ Reg = ?HIPE_X86_REGISTERS:?ECX(),
+ case Src0 of
+ #x86_imm{} ->
+ {FixDst ++ [I#shift{dst=Dst}], DidSpill};
+ #x86_temp{reg=Reg} ->
+ {FixDst ++ [I#shift{dst=Dst}], DidSpill}
+ end.
+
+%%% Fix the operands of a binary op.
+%%% 1. remove pseudos from any explicit memory operands
+%%% 2. if both operands are (implicit or explicit) memory operands,
+%%% move src to a reg and use reg as src in the original insn
+
+do_binary(Src0, Dst0, TempMap, Strategy) ->
+ {FixSrc, Src, DidSpill1} = fix_src_operand(Src0, TempMap, Strategy),
+ {FixDst, Dst, DidSpill2} = fix_dst_operand(Dst0, TempMap, Strategy),
+ {FixSrc3, Src3, DidSpill3} =
+ case is_mem_opnd(Src, TempMap) of
+ false ->
+ {FixSrc, Src, false};
+ true ->
+ case is_mem_opnd(Dst, TempMap) of
+ false ->
+ {FixSrc, Src, false};
+ true ->
+ Src2 = clone(Src, Strategy),
+ FixSrc2 = FixSrc ++ [hipe_x86:mk_move(Src, Src2)],
+ {FixSrc2, Src2, true}
+ end
+ end,
+ {FixSrc3, Src3, FixDst, Dst,
+ DidSpill3 or DidSpill2 or DidSpill1}.
+
+%%% Fix any x86_mem operand to not refer to any spilled temps.
+
+fix_src_operand(Opnd, TmpMap, Strategy) ->
+ fix_mem_operand(Opnd, TmpMap, temp1(Strategy)).
+
+temp1('normal') -> [];
+temp1('linearscan') -> ?HIPE_X86_REGISTERS:temp1().
+
+fix_dst_operand(Opnd, TempMap, Strategy) ->
+ fix_mem_operand(Opnd, TempMap, temp0(Strategy)).
+
+temp0('normal') -> [];
+temp0('linearscan') -> ?HIPE_X86_REGISTERS:temp0().
+
+fix_mem_operand(Opnd, TempMap, RegOpt) -> % -> {[fixupcode], newop, DidSpill}
+ case Opnd of
+ #x86_mem{base=Base,off=Off} ->
+ case is_mem_opnd(Base, TempMap) of
+ false ->
+ case is_mem_opnd(Off, TempMap) of
+ false ->
+ {[], Opnd, false};
+ true ->
+ Temp = clone2(Off, RegOpt),
+ {[hipe_x86:mk_move(Off, Temp)],
+ Opnd#x86_mem{off=Temp},
+ true}
+ end;
+ true ->
+ Temp = clone2(Base, RegOpt),
+ case is_mem_opnd(Off, TempMap) of
+ false -> % imm/reg(pseudo)
+ {[hipe_x86:mk_move(Base, Temp)],
+ Opnd#x86_mem{base=Temp},
+ true};
+ true -> % pseudo(pseudo)
+ {[hipe_x86:mk_move(Base, Temp),
+ hipe_x86:mk_alu('add', Off, Temp)],
+ Opnd#x86_mem{base=Temp, off=hipe_x86:mk_imm(0)},
+ true}
+ end
+ end;
+ _ ->
+ {[], Opnd, false}
+ end.
+
+%%% Check if an operand denotes a memory cell (mem or pseudo).
+
+is_mem_opnd(Opnd, TempMap) ->
+ R =
+ case Opnd of
+ #x86_mem{} -> true;
+ #x86_temp{} ->
+ Reg = hipe_x86:temp_reg(Opnd),
+ case hipe_x86:temp_is_allocatable(Opnd) of
+ true ->
+ case tuple_size(TempMap) > Reg of
+ true ->
+ case
+ hipe_temp_map:is_spilled(Reg, TempMap) of
+ true ->
+ ?count_temp(Reg),
+ true;
+ false -> false
+ end;
+ _ ->
+ %% impossible, but was true in ls post and false in normal post
+ exit({?MODULE,is_mem_opnd,Reg}),
+ false
+ end;
+ false -> true
+ end;
+ _ -> false
+ end,
+ %% io:format("Op ~w mem: ~w\n",[Opnd,R]),
+ R.
+
+%%% Check if an operand is a spilled Temp.
+
+is_spilled(Temp, TempMap) ->
+ case hipe_x86:temp_is_allocatable(Temp) of
+ true ->
+ Reg = hipe_x86:temp_reg(Temp),
+ case tuple_size(TempMap) > Reg of
+ true ->
+ case hipe_temp_map:is_spilled(Reg, TempMap) of
+ true ->
+ ?count_temp(Reg),
+ true;
+ false ->
+ false
+ end;
+ false ->
+ false
+ end;
+ false -> true
+ end.
+
+%%% Make Reg a clone of Dst (attach Dst's type to Reg).
+
+clone(Dst, Strategy) ->
+ Type =
+ case Dst of
+ #x86_mem{} -> hipe_x86:mem_type(Dst);
+ #x86_temp{} -> hipe_x86:temp_type(Dst)
+ end,
+ spill_temp(Type, Strategy).
+
+spill_temp0(Type, 'normal') ->
+ hipe_x86:mk_new_temp(Type);
+spill_temp0(Type, 'linearscan') ->
+ hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type).
+
+spill_temp(Type, 'normal') ->
+ hipe_x86:mk_new_temp(Type);
+spill_temp(Type, 'linearscan') ->
+ hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type).
+
+%%% Make a certain reg into a clone of Dst
+
+clone2(Dst, RegOpt) ->
+ Type =
+ case Dst of
+ #x86_mem{} -> hipe_x86:mem_type(Dst);
+ #x86_temp{} -> hipe_x86:temp_type(Dst)
+ end,
+ case RegOpt of
+ [] -> hipe_x86:mk_new_temp(Type);
+ Reg -> hipe_x86:mk_temp(Reg, Type)
+ end.
diff --git a/lib/hipe/x86/hipe_x86_ra_x87_ls.erl b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl
new file mode 100644
index 0000000000..6bdb08c6fb
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_ra_x87_ls.erl
@@ -0,0 +1,63 @@
+%% $Id$
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2006-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Linear Scan register allocator for x87
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_RA_X87_LS, hipe_amd64_ra_x87_ls).
+-define(HIPE_X86_SPECIFIC_X87, hipe_amd64_specific_x87).
+-define(HIPE_X86_PP, hipe_amd64_pp).
+-define(HIPE_X86_RA_LS, hipe_amd64_ra_ls).
+-else.
+-define(HIPE_X86_RA_X87_LS, hipe_x86_ra_x87_ls).
+-define(HIPE_X86_SPECIFIC_X87, hipe_x86_specific_x87).
+-define(HIPE_X86_PP, hipe_x86_pp).
+-define(HIPE_X86_RA_LS, hipe_x86_ra_ls).
+-endif.
+
+-module(?HIPE_X86_RA_X87_LS).
+-export([ra/2]).
+
+%%-define(DEBUG,1).
+
+-define(HIPE_INSTRUMENT_COMPILER, false). %% Turn off instrumentation.
+-include("../main/hipe.hrl").
+
+ra(Defun, Options) ->
+ ?inc_counter(ra_calls_counter,1),
+ CFG = hipe_x86_cfg:init(Defun),
+ %% ?inc_counter(ra_caller_saves_counter,count_caller_saves(CFG)),
+ SpillIndex = 0,
+ SpillLimit = ?HIPE_X86_SPECIFIC_X87:number_of_temporaries(CFG),
+ ?inc_counter(bbs_counter, length(hipe_x86_cfg:labels(CFG))),
+
+ ?inc_counter(ra_iteration_counter,1),
+ %% ?HIPE_X86_PP:pp(Defun),
+ Cfg = hipe_x86_cfg:init(Defun), % XXX: didn't we just compute this above?
+
+ {Coloring,NewSpillIndex} =
+ ?HIPE_X86_RA_LS:regalloc(Cfg,
+ ?HIPE_X86_SPECIFIC_X87:allocatable(),
+ [hipe_x86_cfg:start_label(Cfg)],
+ SpillIndex, SpillLimit, Options,
+ ?HIPE_X86_SPECIFIC_X87),
+
+ ?add_spills(Options, NewSpillIndex),
+ {Defun, Coloring, NewSpillIndex}.
diff --git a/lib/hipe/x86/hipe_x86_registers.erl b/lib/hipe/x86/hipe_x86_registers.erl
new file mode 100644
index 0000000000..1cfa095995
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_registers.erl
@@ -0,0 +1,254 @@
+%%%
+%%% %CopyrightBegin%
+%%%
+%%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%%
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%%
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%%
+%%% %CopyrightEnd%
+%%%
+%%%
+%%% TODO:
+%%% - Do we need a pseudo reg for the condition codes?
+
+-module(hipe_x86_registers).
+
+-export([reg_name/1,
+ first_virtual/0,
+ is_precoloured/1,
+ is_precoloured_x87/1,
+ all_precoloured/0,
+ eax/0,
+ ecx/0,
+ temp0/0,
+ temp1/0,
+ sp/0,
+ proc_pointer/0,
+ heap_limit/0,
+ fcalls/0,
+ proc_offset/1,
+ sp_limit_offset/0,
+ is_fixed/1,
+ %% fixed/0,
+ allocatable/0,
+ allocatable_x87/0,
+ nr_args/0,
+ arg/1,
+ is_arg/1,
+ args/1,
+ nr_rets/0,
+ ret/1,
+ call_clobbered/0,
+ tailcall_clobbered/0,
+ live_at_return/0,
+ float_size/0,
+ wordsize/0,
+ alignment/0]).
+
+-include("../rtl/hipe_literals.hrl").
+
+-ifdef(X86_HP_IN_ESI).
+-export([heap_pointer/0]).
+-endif.
+
+-define(EAX, 0).
+-define(ECX, 1).
+-define(EDX, 2).
+-define(EBX, 3).
+-define(ESP, 4).
+-define(EBP, 5).
+-define(ESI, 6).
+-define(EDI, 7).
+-define(FCALLS, 8). % proc field alias
+-define(HEAP_LIMIT, 9). % proc field alias
+-define(LAST_PRECOLOURED, 9).
+
+-define(ARG0, ?EAX).
+-define(ARG1, ?EDX).
+-define(ARG2, ?ECX).
+-define(ARG3, ?EBX).
+-define(ARG4, ?EDI).
+
+-define(RET0, ?EAX).
+-define(RET1, ?EDX).
+-define(RET2, ?ECX).
+-define(RET3, ?EBX).
+-define(RET4, ?EDI).
+
+-define(TEMP0, ?EBX). % XXX: was EAX
+-define(TEMP1, ?EDI). % XXX: was EDX then EDI
+
+-define(PROC_POINTER, ?EBP).
+
+reg_name(R) ->
+ case R of
+ ?EAX -> "%eax";
+ ?ECX -> "%ecx";
+ ?EDX -> "%edx";
+ ?EBX -> "%ebx";
+ ?ESP -> "%esp";
+ ?EBP -> "%ebp";
+ ?ESI -> "%esi";
+ ?EDI -> "%edi";
+ ?FCALLS -> "%fcalls";
+ ?HEAP_LIMIT -> "%hplim";
+ Other -> "%r" ++ integer_to_list(Other)
+ end.
+
+first_virtual() -> ?LAST_PRECOLOURED + 1.
+
+is_precoloured(X) -> X =< ?LAST_PRECOLOURED.
+
+is_precoloured_x87(X) -> X =< 6.
+
+all_precoloured() ->
+ [?EAX,
+ ?ECX,
+ ?EDX,
+ ?EBX,
+ ?ESP,
+ ?EBP,
+ ?ESI,
+ ?EDI,
+ ?FCALLS,
+ ?HEAP_LIMIT].
+
+eax() -> ?EAX.
+ecx() -> ?ECX.
+temp0() -> ?TEMP0.
+temp1() -> ?TEMP1.
+sp() -> ?ESP.
+proc_pointer() -> ?PROC_POINTER.
+fcalls() -> ?FCALLS.
+heap_limit() -> ?HEAP_LIMIT.
+
+-ifdef(X86_HP_IN_ESI).
+-define(ESI_IS_FIXED,1).
+-define(HEAP_POINTER, ?ESI).
+heap_pointer() -> ?HEAP_POINTER.
+is_heap_pointer(?HEAP_POINTER) -> true;
+is_heap_pointer(_) -> false.
+-define(LIST_HP_FIXED,[?HEAP_POINTER]).
+-define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]).
+-else.
+is_heap_pointer(_) -> false.
+-define(LIST_HP_FIXED,[]).
+-define(LIST_HP_LIVE_AT_RETURN,[]).
+-endif.
+
+-ifdef(ESI_IS_FIXED).
+-define(LIST_ESI_ALLOCATABLE,[]).
+-define(LIST_ESI_CALL_CLOBBERED,[]).
+-else.
+-define(LIST_ESI_ALLOCATABLE,[?ESI]).
+-define(LIST_ESI_CALL_CLOBBERED,[{?ESI,tagged},{?ESI,untagged}]).
+-endif.
+
+proc_offset(?FCALLS) -> ?P_FCALLS;
+proc_offset(?HEAP_LIMIT) -> ?P_HP_LIMIT;
+proc_offset(_) -> false.
+
+sp_limit_offset() -> ?P_NSP_LIMIT.
+
+is_fixed(?ESP) -> true;
+is_fixed(?PROC_POINTER) -> true;
+is_fixed(?FCALLS) -> true;
+is_fixed(?HEAP_LIMIT) -> true;
+is_fixed(R) -> is_heap_pointer(R).
+
+%% fixed() ->
+%% [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED].
+
+allocatable() ->
+ [?EDX, ?ECX, ?EBX, ?EAX, ?EDI| ?LIST_ESI_ALLOCATABLE].
+
+allocatable_x87() ->
+ [0,1,2,3,4,5,6].
+
+nr_args() -> ?X86_NR_ARG_REGS.
+
+arg(N) ->
+ if N < ?X86_NR_ARG_REGS ->
+ case N of
+ 0 -> ?ARG0;
+ 1 -> ?ARG1;
+ 2 -> ?ARG2;
+ 3 -> ?ARG3;
+ 4 -> ?ARG4;
+ _ -> exit({?MODULE, arg, N})
+ end;
+ true ->
+ exit({?MODULE, arg, N})
+ end.
+
+is_arg(R) ->
+ case R of
+ ?ARG0 -> ?X86_NR_ARG_REGS > 0;
+ ?ARG1 -> ?X86_NR_ARG_REGS > 1;
+ ?ARG2 -> ?X86_NR_ARG_REGS > 2;
+ ?ARG3 -> ?X86_NR_ARG_REGS > 3;
+ ?ARG4 -> ?X86_NR_ARG_REGS > 4;
+ _ -> false
+ end.
+
+args(Arity) when is_integer(Arity), Arity >= 0 ->
+ N = erlang:min(Arity, ?X86_NR_ARG_REGS),
+ args(N-1, []).
+
+args(I, Rest) when I < 0 -> Rest;
+args(I, Rest) -> args(I-1, [arg(I) | Rest]).
+
+nr_rets() -> ?X86_NR_RET_REGS.
+
+ret(N) ->
+ if N < ?X86_NR_RET_REGS ->
+ case N of
+ 0 -> ?RET0;
+ 1 -> ?RET1;
+ 2 -> ?RET2;
+ 3 -> ?RET3;
+ 4 -> ?RET4;
+ _ -> exit({?MODULE, ret, N})
+ end;
+ true ->
+ exit({?MODULE, ret, N})
+ end.
+
+call_clobbered() ->
+ [{?EAX,tagged},{?EAX,untagged}, % does the RA strip the type or not?
+ {?EDX,tagged},{?EDX,untagged},
+ {?ECX,tagged},{?ECX,untagged},
+ {?EBX,tagged},{?EBX,untagged},
+ {?EDI,tagged},{?EDI,untagged}
+ | ?LIST_ESI_CALL_CLOBBERED] ++ all_x87_pseudos().
+
+tailcall_clobbered() -> % tailcall crapola needs two temps
+ [{?TEMP0,tagged},{?TEMP0,untagged},
+ {?TEMP1,tagged},{?TEMP1,untagged}] ++ all_x87_pseudos().
+
+all_x87_pseudos() ->
+ [{0,double}, {1,double}, {2,double}, {3,double},
+ {4,double}, {5,double}, {6,double}].
+
+live_at_return() ->
+ [{?ESP,untagged}
+ ,{?PROC_POINTER,untagged}
+ ,{?FCALLS,untagged}
+ ,{?HEAP_LIMIT,untagged}
+ | ?LIST_HP_LIVE_AT_RETURN
+ ].
+
+alignment() -> 4.
+
+float_size() -> 8.
+
+wordsize() -> 4.
diff --git a/lib/hipe/x86/hipe_x86_spill_restore.erl b/lib/hipe/x86/hipe_x86_spill_restore.erl
new file mode 100644
index 0000000000..e60c446e17
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_spill_restore.erl
@@ -0,0 +1,345 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% ====================================================================
+%% Authors : Dogan Yazar and Erdem Aksu (KT2 project of 2008)
+%% ====================================================================
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_SPILL_RESTORE, hipe_amd64_spill_restore).
+-define(HIPE_X86_LIVENESS, hipe_amd64_liveness).
+-define(HIPE_X86_SPECIFIC, hipe_amd64_specific).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-define(X86STR, "amd64").
+-else.
+-define(HIPE_X86_SPILL_RESTORE, hipe_x86_spill_restore).
+-define(HIPE_X86_LIVENESS, hipe_x86_liveness).
+-define(HIPE_X86_SPECIFIC, hipe_x86_specific).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-define(X86STR, "x86").
+-endif.
+
+-module(?HIPE_X86_SPILL_RESTORE).
+
+-export([spill_restore/2]).
+
+%% controls which set library is used to keep temp variables.
+-define(SET_MODULE, ordsets).
+
+%% Turn on instrumentation.
+-define(HIPE_INSTRUMENT_COMPILER, true).
+
+-include("../main/hipe.hrl").
+-include("../x86/hipe_x86.hrl"). % Added for the definition of #pseudo_call{}
+-include("../flow/cfg.hrl"). % Added for the definition of #cfg{}
+
+%% Main function
+spill_restore(Defun, Options) ->
+ CFG = ?option_time(firstPass(Defun), ?X86STR" First Pass", Options),
+ CFGFinal = ?option_time(secondPass(CFG), ?X86STR" Second Pass", Options),
+ hipe_x86_cfg:linearise(CFGFinal).
+
+%% Performs the first pass of the algorithm.
+%% By working bottom up, introduce the pseudo_spills.
+firstPass(Defun) ->
+ CFG0 = ?HIPE_X86_SPECIFIC:defun_to_cfg(Defun),
+ %% get the labels bottom up
+ Labels = hipe_x86_cfg:postorder(CFG0),
+ Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),
+ %% spill around the function will be introduced below the move
+ %% formals, so get all labels except it.
+ LabelsExceptMoveFormals = lists:sublist(Labels, length(Labels)-1),
+ %% all work is done by the helper function firstPassHelper
+ %% saveTree keeps the all newly introduced spills. Keys are the labels.
+ {CFG1, SaveTree} = firstPassHelper(LabelsExceptMoveFormals, Liveness, CFG0),
+ case hipe_x86_cfg:reverse_postorder(CFG0) of
+ [Label1, Label2|_] ->
+ SaveTreeElement = saveTreeLookup(Label2, SaveTree),
+ %% FilteredSaveTreeElement is the to be spilled temps around the function call.
+ %% They are spilled just before move formals
+ FilteredSaveTreeElement = [Temp || Temp <- SaveTreeElement, temp_is_pseudo(Temp)],
+ Block = hipe_x86_cfg:bb(CFG1, Label1),
+ Code = hipe_bb:code(Block),
+ %% The following statements are tedious but work ok.
+ %% Put spills between move formals and the jump code.
+ %% This disgusting thing is done because spills should be
+ %% introduced after move formals.
+ %% Another solution may be to introduce another block.
+ MoveCodes = lists:sublist(Code, length(Code)-1),
+ JumpCode = lists:last(Code),
+ hipe_x86_cfg:bb_add(CFG1, Label1, hipe_bb:mk_bb(MoveCodes ++ [hipe_x86:mk_pseudo_spill(FilteredSaveTreeElement)] ++ [JumpCode]));
+ _ ->
+ CFG1
+ end.
+
+%% helper function of firstPass
+
+%% processes all labels recursively and decides the spills to be put.
+%% spills are introduced before each function call (pseudo_call) as well as
+%% global spill is found
+firstPassHelper(Labels, Liveness, CFG) ->
+ firstPassHelper(Labels, Liveness, CFG, gb_trees:empty()).
+
+firstPassHelper([Label|Labels], Liveness, CFG, SaveTree) ->
+ LiveOut = from_list(?HIPE_X86_LIVENESS:liveout(Liveness, Label)),
+ Block = hipe_x86_cfg:bb(CFG, Label),
+ Code = hipe_bb:code(Block),
+ Succ = hipe_x86_cfg:succ(CFG, Label),
+ IntersectedSaveList = findIntersectedSaveList(Succ,SaveTree),
+ %% call firstPassDoBlock which will give the updated block
+ %% code(including spills) as well as Intersected Save List which
+ %% should be passed above blocks
+ {_,NewIntersectedList,NewCode} =
+ firstPassDoBlock(Code, LiveOut,IntersectedSaveList),
+ NewBlock = hipe_bb:code_update(Block, NewCode),
+ NewCFG = hipe_x86_cfg:bb_add(CFG, Label, NewBlock),
+ SizeOfSet = setSize(NewIntersectedList),
+
+ %% if the Intersected Save List is not empty, insert it in the save tree.
+ if SizeOfSet =/= 0 ->
+ UpdatedSaveTree = gb_trees:insert(Label,NewIntersectedList,SaveTree),
+ firstPassHelper(Labels, Liveness, NewCFG,UpdatedSaveTree);
+ true ->
+ firstPassHelper(Labels, Liveness, NewCFG,SaveTree)
+ end;
+firstPassHelper([], _, CFG, SaveTree) ->
+ {CFG, SaveTree}.
+
+%% handle each instruction in the block bottom up
+firstPassDoBlock(Insts, LiveOut, IntersectedSaveList) ->
+ lists:foldr(fun firstPassDoInsn/2, {LiveOut,IntersectedSaveList,[]}, Insts).
+
+firstPassDoInsn(I, {LiveOut,IntersectedSaveList,PrevInsts} ) ->
+ case I of
+ #pseudo_call{} ->
+ do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts});
+ _ -> % other instructions
+ DefinedList = from_list( ?HIPE_X86_LIVENESS:defines(I)),
+ UsedList = from_list(?HIPE_X86_LIVENESS:uses(I)),
+
+ NewLiveOut = subtract(union(LiveOut, UsedList), DefinedList),
+ NewIntersectedSaveList = subtract(IntersectedSaveList, DefinedList),
+
+ {NewLiveOut, NewIntersectedSaveList, [I|PrevInsts]}
+ end.
+
+do_pseudo_call(I, {LiveOut,IntersectedSaveList,PrevInsts}) ->
+ LiveTemps = [Temp || Temp <- to_list(LiveOut), temp_is_pseudo(Temp)],
+ NewIntersectedSaveList = union(IntersectedSaveList, LiveOut),
+ {LiveOut, NewIntersectedSaveList, [hipe_x86:mk_pseudo_spill(LiveTemps), I | PrevInsts]}.
+
+findIntersectedSaveList(LabelList, SaveTree) ->
+ findIntersectedSaveList([saveTreeLookup(Label,SaveTree) || Label <- LabelList]).
+
+findIntersectedSaveList([]) ->
+ [];
+findIntersectedSaveList([List1]) ->
+ List1;
+findIntersectedSaveList([List1,List2|Rest]) ->
+ findIntersectedSaveList([intersection(List1, List2)|Rest]).
+
+saveTreeLookup(Label, SaveTree) ->
+ case gb_trees:lookup(Label, SaveTree) of
+ {value, SaveList} ->
+ SaveList;
+ _ ->
+ []
+ end.
+
+%% Performs the second pass of the algoritm.
+%% It basically eliminates the unnecessary spills and introduces restores.
+%% Works top down
+secondPass(CFG0) ->
+ Labels = hipe_x86_cfg:reverse_postorder(CFG0),
+ Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),
+ secondPassHelper(Labels,Liveness,CFG0).
+
+%% helper function of secondPass.
+
+%% recursively handle all labels given.
+secondPassHelper(Labels, Liveness, CFG) ->
+ secondPassHelper(Labels, Liveness, CFG, gb_trees:empty(), CFG).
+
+%% AccumulatedCFG stands for the CFG that has restore edges incrementally.
+%% UnmodifiedCFG is the CFG created after first pass.
+
+%% AccumulatedSaveTree is used to eliminate the unnecessary saves. The
+%% saves (spills) in above blocks are traversed down (if still live
+%% and not redefined) and redundant saves are eliminated in the lower
+%% blocks.
+%% For memory efficiency, it may be better not to maintain the
+%% AccumulatedSaveTree but traverse the tree recursively and pass the
+%% save lists to the childs individually.
+%% But current approach may be faster even though it needs bigger memory.
+
+secondPassHelper([Label|RestOfLabels], Liveness,
+ AccumulatedCFG, AccumulatedSaveTree, UnmodifiedCFG) ->
+ LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label),
+ Block = hipe_x86_cfg:bb(AccumulatedCFG, Label),
+ Code = hipe_bb:code(Block),
+
+ %% UnmodifiedCFG is needed for getting the correct predecessors.
+ %% (i.e. not to get the restore edge blocks)
+ PredList = hipe_x86_cfg:pred(UnmodifiedCFG, Label),
+ %% find the spills coming from all the parents by intersecting
+ InitialAccumulatedSaveList =
+ findIntersectedSaveList(PredList, AccumulatedSaveTree),
+ AccumulatedSaveList =
+ keepLiveVarsInAccumSaveList(InitialAccumulatedSaveList, LiveOut),
+
+ {NewCode, CFGUpdateWithRestores, NewAccumulatedSaveList} =
+ secondPassDoBlock(Label, Code, AccumulatedCFG, AccumulatedSaveList),
+
+ UpdatedAccumulatedSaveTree =
+ gb_trees:insert(Label, NewAccumulatedSaveList, AccumulatedSaveTree),
+ NewBlock = hipe_bb:code_update(Block, NewCode),
+ NewCFG = hipe_x86_cfg:bb_add(CFGUpdateWithRestores, Label, NewBlock),
+ secondPassHelper(RestOfLabels, Liveness, NewCFG,
+ UpdatedAccumulatedSaveTree, UnmodifiedCFG);
+secondPassHelper([], _, AccumulatedCFG, _, _) ->
+ AccumulatedCFG.
+
+secondPassDoBlock(CurrentLabel, Insts, CFG, AccumulatedSaveList) ->
+ {NewAccumulatedSaveList,NewInsts,_,_,CFGUpdateWithRestores} =
+ lists:foldl(fun secondPassDoInsn/2, {AccumulatedSaveList,[],[],CurrentLabel,CFG}, Insts),
+ {NewInsts, CFGUpdateWithRestores, NewAccumulatedSaveList}.
+
+secondPassDoInsn(I, {AccumulatedSaveList,PrevInsts,SpillList,CurrentLabel,CFG}) ->
+ case I of
+ #pseudo_spill{} ->
+ %% spill variables that are not accumulated from top down
+ %% (which are not already saved)
+ VariablesAlreadySaved = [X || {X,_} <- to_list(AccumulatedSaveList)],
+ VariablesToBeSpilled = I#pseudo_spill.args -- VariablesAlreadySaved,
+ NewSpillList = [{Temp, hipe_x86:mk_new_temp(Temp#x86_temp.type)} || Temp <- VariablesToBeSpilled],
+ %% update accumulated saved list by adding the newly spilled variables.
+ NewAccumulatedSaveList = union(AccumulatedSaveList, from_list(NewSpillList)),
+ {NewAccumulatedSaveList, PrevInsts ++ secondPassDoPseudoSpill(NewSpillList), NewSpillList, CurrentLabel, CFG};
+ #pseudo_call{} ->
+ {CFGUpdateWithRestores, NewPseudoCall} =
+ secondPassDoPseudoCall(I, AccumulatedSaveList, CFG),
+ %% spill list is emptied after use
+ {AccumulatedSaveList, PrevInsts ++ [NewPseudoCall], CurrentLabel, [], CFGUpdateWithRestores};
+ _ ->
+ %% remove the defined variables from the accumulated save
+ %% list since they need to be saved again in later occasions.
+ DefinedList = from_list(?HIPE_X86_LIVENESS:defines(I)),
+ NewAccumulatedSaveList = removeRedefVarsFromAccumSaveList(AccumulatedSaveList, DefinedList),
+ {NewAccumulatedSaveList, PrevInsts ++ [I], SpillList, CurrentLabel, CFG}
+ end.
+
+%% remove dead vars from accumulated save list so that they are not restored.
+keepLiveVarsInAccumSaveList([], _) ->
+ [];
+keepLiveVarsInAccumSaveList([{Var,Temp}|Rest], DefinedList) ->
+ IsDefined = is_element(Var, DefinedList),
+ case IsDefined of
+ true -> [{Var,Temp}|keepLiveVarsInAccumSaveList(Rest, DefinedList)];
+ false -> keepLiveVarsInAccumSaveList(Rest, DefinedList)
+ end.
+
+%% remove the redefined variables from accumulated save list since
+%% they are changed.
+removeRedefVarsFromAccumSaveList([], _) ->
+ [];
+removeRedefVarsFromAccumSaveList([{Var,Temp}|Rest], DefinedList) ->
+ IsDefined = is_element(Var, DefinedList),
+ case IsDefined of
+ true -> removeRedefVarsFromAccumSaveList(Rest, DefinedList);
+ false -> [{Var,Temp}|removeRedefVarsFromAccumSaveList(Rest, DefinedList)]
+ end.
+
+%% convert pseudo_spills to move instructions.
+secondPassDoPseudoSpill(SpillList) ->
+ lists:foldl(fun convertPseudoSpillToMov/2, [], SpillList).
+
+%% if there are variables to be restored, then call addRestoreBlockToEdge to
+%% place them in a new block on the edge of the blocks.
+secondPassDoPseudoCall(I, RestoreList, CFG) ->
+ ContLabel = I#pseudo_call.contlab,
+ SizeOfSet = setSize(RestoreList),
+ if SizeOfSet =/= 0 ->
+ addRestoreBlockToEdge(I, ContLabel, CFG, RestoreList);
+ true ->
+ {CFG, I}
+ end.
+
+%% prepares the moves for the spills.
+convertPseudoSpillToMov({Temp, NewTemp}, OtherMoves) ->
+ OtherMoves ++ [mkMove(Temp, NewTemp)].
+
+%% prepares the moves for the restores.
+%% Called by addRestoreBlockToEdge while introducing the restores.
+convertPseudoRestoreToMov({Temp, NewTemp}, OtherMoves) ->
+ OtherMoves ++ [mkMove(NewTemp, Temp)].
+
+%% makes the move record, special care is taken for doubles.
+mkMove(NewTemp,Temp) ->
+ if Temp#x86_temp.type =:= 'double' ->
+ hipe_x86:mk_fmove(NewTemp, Temp);
+ true ->
+ hipe_x86:mk_move(NewTemp, Temp)
+ end.
+
+%% adds a new block (on the edge) that includes introduced restore moves.
+addRestoreBlockToEdge(PseudoCall, ContLabel, CFG, TempArgsList) ->
+ NextLabel = hipe_gensym:get_next_label(x86),
+ NewCode = lists:foldl(fun convertPseudoRestoreToMov/2, [], TempArgsList) ++ [hipe_x86:mk_jmp_label(ContLabel)],
+ NewBlock = hipe_bb:mk_bb(NewCode),
+ NewPseudoCall = redirect_pseudo_call(PseudoCall, ContLabel, NextLabel),
+ NewCFG = hipe_x86_cfg:bb_add(CFG, NextLabel, NewBlock),
+ {NewCFG, NewPseudoCall}.
+
+%% used instead of hipe_x86_cfg:redirect_jmp since it does not handle pseudo_call calls.
+redirect_pseudo_call(I = #pseudo_call{contlab=ContLabel}, Old, New) ->
+ case Old =:= ContLabel of
+ true -> I#pseudo_call{contlab=New};
+ false -> I
+ end.
+
+temp_is_pseudo(Temp) ->
+ case hipe_x86:is_temp(Temp) of
+ true -> not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp)));
+ false -> false
+ end.
+
+%%---------------------------------------------------------------------
+%% Set operations where the module name is an easily changeable macro
+%%---------------------------------------------------------------------
+
+union(Set1,Set2) ->
+ ?SET_MODULE:union(Set1,Set2).
+
+setSize(Set) ->
+ ?SET_MODULE:size(Set).
+
+from_list(List) ->
+ ?SET_MODULE:from_list(List).
+
+to_list(Set) ->
+ ?SET_MODULE:to_list(Set).
+
+subtract(Set1, Set2) ->
+ ?SET_MODULE:subtract(Set1, Set2).
+
+intersection(Set1, Set2) ->
+ ?SET_MODULE:intersection(Set1, Set2).
+
+is_element(Element, Set) ->
+ ?SET_MODULE:is_element(Element, Set).
diff --git a/lib/hipe/x86/hipe_x86_x87.erl b/lib/hipe/x86/hipe_x86_x87.erl
new file mode 100644
index 0000000000..6ef14abdbb
--- /dev/null
+++ b/lib/hipe/x86/hipe_x86_x87.erl
@@ -0,0 +1,635 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Floating point handling.
+
+-ifdef(HIPE_AMD64).
+-define(HIPE_X86_X87, hipe_amd64_x87).
+-define(HIPE_X86_DEFUSE, hipe_amd64_defuse).
+-define(HIPE_X86_LIVENESS, hipe_amd64_liveness).
+-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
+-else.
+-define(HIPE_X86_X87, hipe_x86_x87).
+-define(HIPE_X86_DEFUSE, hipe_x86_defuse).
+-define(HIPE_X86_LIVENESS, hipe_x86_liveness).
+-define(HIPE_X86_REGISTERS, hipe_x86_registers).
+-endif.
+
+-module(?HIPE_X86_X87).
+
+-export([map/1]).
+
+-include("../x86/hipe_x86.hrl").
+-include("../main/hipe.hrl").
+
+%%----------------------------------------------------------------------
+
+map(Defun) ->
+ CFG0 = hipe_x86_cfg:init(Defun),
+ %% hipe_x86_cfg:pp(CFG0),
+ Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),
+ StartLabel = hipe_x86_cfg:start_label(CFG0),
+ {CFG1,_} = do_blocks([], [StartLabel], CFG0, Liveness, [], gb_trees:empty()),
+ hipe_x86_cfg:linearise(CFG1).
+
+do_blocks(Pred, [Lbl|Lbls], CFG, Liveness, Map, BlockMap) ->
+ case gb_trees:lookup(Lbl, BlockMap) of
+ none ->
+ %% This block has not been visited.
+ Block = hipe_x86_cfg:bb(CFG, Lbl),
+ Succ = hipe_x86_cfg:succ(CFG, Lbl),
+ NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap),
+ LiveOut = [X || X <- ?HIPE_X86_LIVENESS:liveout(Liveness, Lbl),
+ is_fp(X)],
+ Code = hipe_bb:code(Block),
+ ReverseCode = lists:reverse(Code),
+ {NewCode0, NewMap, NewBlockMap1, Dirty} =
+ do_block(ReverseCode, LiveOut, Map, NewBlockMap),
+ NewCFG1 =
+ case Dirty of
+ true ->
+ NewBlock = hipe_bb:code_update(Block, NewCode0),
+ hipe_x86_cfg:bb_add(CFG, Lbl, NewBlock);
+ _ ->
+ CFG
+ end,
+ {NewCFG3, NewBlockMap2} =
+ do_blocks(Lbl, Succ, NewCFG1, Liveness, NewMap, NewBlockMap1),
+ do_blocks(Pred, Lbls, NewCFG3, Liveness, Map, NewBlockMap2);
+ {value, fail} ->
+ %% Don't have to follow this trace any longer.
+ do_blocks(Pred,Lbls, CFG, Liveness, Map, BlockMap);
+ {value, ExistingMap} ->
+ %% This block belongs to a trace already handled.
+ %% The Map coming in must be identical to the one used
+ %% when the block was processed.
+ if ExistingMap =:= Map ->
+ do_blocks(Pred, Lbls, CFG, Liveness, Map, BlockMap);
+ true ->
+ NewCFG = do_shuffle(Pred, Lbl, CFG, Map, ExistingMap),
+ do_blocks(Pred, Lbls, NewCFG, Liveness, Map, BlockMap)
+ end
+ end;
+do_blocks(_Pred, [], CFG, _Liveness, _Map, BlockMap) ->
+ {CFG, BlockMap}.
+
+do_block(Ins, LiveOut, Map, BlockMap) ->
+ do_block(Ins, LiveOut, Map, BlockMap, false).
+
+do_block([I|Is], LiveOut, Map, BlockMap, Dirty) ->
+ case handle_insn(I) of
+ false ->
+ {NewCode, NewMap, NewBlockMap, NewDirty} =
+ do_block(Is, LiveOut, Map, BlockMap, Dirty),
+ {NewCode++[I], NewMap, NewBlockMap, NewDirty};
+ true ->
+ Def = ordsets:from_list(?HIPE_X86_DEFUSE:insn_def(I)),
+ Use = ordsets:from_list(?HIPE_X86_DEFUSE:insn_use(I)),
+ NewLiveOut =
+ ordsets:filter(fun(X) -> is_fp(X) end,
+ ordsets:union(ordsets:subtract(LiveOut, Def), Use)),
+ {NewCode, NewMap, NewBlockMap, NewDirty} =
+ do_block(Is, NewLiveOut, Map, BlockMap, Dirty),
+ {NewI, NewMap1, NewBlockMap1} =
+ do_insn(I, LiveOut, NewMap, NewBlockMap),
+ NewDirty1 =
+ if NewDirty =:= true -> true;
+ NewI =:= [I] -> false;
+ true -> true
+ end,
+ {NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1}
+ end;
+do_block([], LiveOut, Map, BlockMap, Dirty) ->
+ case [X || X <- Map, not lists:member(X, LiveOut)] of
+ [] ->
+ {[], Map, BlockMap, Dirty};
+ Pop ->
+ {PopIns, NewMap} = pop_dead(Pop, Map),
+ {PopIns, NewMap, BlockMap, true}
+ end.
+
+do_shuffle(Pred, Lbl, CFG, OldMap, NewMap) ->
+ %% First make sure both maps have the same members.
+ Push = NewMap -- OldMap,
+ Pop = OldMap -- NewMap,
+ {PopInsn, OldMap0} = pop_dead(Pop, OldMap),
+ {PushInsn, OldMap1} =
+ case Push of
+ []-> {[], OldMap0};
+ _-> push_list(lists:reverse(Push), OldMap0)
+ end,
+ Code =
+ if OldMap1 =:= NewMap ->
+ %% It was enough to push and pop.
+ PopInsn ++ PushInsn ++ [hipe_x86:mk_jmp_label(Lbl)];
+ true ->
+ %% Shuffle the positions so the maps match
+ Cycles = find_swap_cycles(OldMap1, NewMap),
+ SwitchInsns = do_switching(Cycles),
+ PopInsn ++ PushInsn ++ SwitchInsns ++ [hipe_x86:mk_jmp_label(Lbl)]
+ end,
+ %% Update the CFG.
+ NewLabel = hipe_gensym:get_next_label(x86),
+ NewCFG1 = hipe_x86_cfg:bb_add(CFG, NewLabel, hipe_bb:mk_bb(Code)),
+ OldPred = hipe_x86_cfg:bb(NewCFG1, Pred),
+ PredCode = hipe_bb:code(OldPred),
+ NewLast = redirect(lists:last(PredCode), Lbl,NewLabel),
+ NewPredCode = butlast(PredCode) ++ [NewLast],
+ NewPredBB = hipe_bb:code_update(OldPred, NewPredCode),
+ hipe_x86_cfg:bb_add(NewCFG1, Pred, NewPredBB).
+
+find_swap_cycles(OldMap, NewMap) ->
+ Moves = [get_pos(X, NewMap, 1) || X <- OldMap],
+ find_swap_cycles(OldMap, Moves, lists:seq(1, length(OldMap)), []).
+
+find_swap_cycles(OldMap, Moves, NotHandled, Cycles) ->
+ if NotHandled =:= [] -> Cycles;
+ true ->
+ Cycle = find_cycle(Moves, [hd(NotHandled)]),
+ NewNotHandled = NotHandled -- Cycle,
+ case lists:member(1, Cycle) of
+ true ->
+ %% The cycle that contains the first element on the stack
+ %% must be processed last.
+ NewCycle = format_cycle(Cycle),
+ find_swap_cycles(OldMap, Moves, NewNotHandled, Cycles ++ [NewCycle]);
+ _ ->
+ NewCycle = format_cycle(Cycle),
+ find_swap_cycles(OldMap, Moves, NewNotHandled, [NewCycle|Cycles])
+ end
+ end.
+
+find_cycle(Moves, Cycle) ->
+ To = lists:nth(lists:last(Cycle), Moves),
+ if To =:= hd(Cycle) -> Cycle;
+ true -> find_cycle(Moves, Cycle ++ [To])
+ end.
+
+format_cycle(C) ->
+ %% The position numbers start with 1 - should start with 0.
+ %% If position 0 is in the cycle it will be permuted until
+ %% the 0 is first and then remove it.
+ %% Otherwise the first element is also added last.
+ NewCycle = [X - 1 || X <- C],
+ case lists:member(0, NewCycle) of
+ true -> format_cycle(NewCycle, []);
+ _ -> NewCycle ++ [hd(NewCycle)]
+ end.
+
+format_cycle([H|T], NewCycle) ->
+ case H of
+ 0 -> T ++ NewCycle;
+ _ -> format_cycle(T, NewCycle ++ [H])
+ end.
+
+do_switching(Cycles) ->
+ do_switching(Cycles, []).
+
+do_switching([C|Cycles], Insns) ->
+ NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X <- C],
+ do_switching(Cycles, NewInsns);
+do_switching([], Insns) ->
+ Insns.
+
+redirect(Insn, OldLbl, NewLbl) ->
+ case Insn of
+ #pseudo_call{contlab = ContLab, sdesc = SDesc} ->
+ #x86_sdesc{exnlab = ExnLab} = SDesc,
+ if ContLab =:= OldLbl ->
+ Insn#pseudo_call{contlab = NewLbl};
+ ExnLab =:= OldLbl ->
+ Insn#pseudo_call{sdesc = SDesc#x86_sdesc{exnlab = NewLbl}}
+ end;
+ _ ->
+ hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl)
+ end.
+
+do_insn(I, LiveOut, Map, BlockMap) ->
+ case I of
+ #pseudo_call{'fun' = Fun, contlab = ContLab} ->
+ case Fun of
+ %% We don't want to spill anything if an exception has been thrown.
+ {_, 'handle_fp_exception'} ->
+ NewBlockMap =
+ case gb_trees:lookup(ContLab, BlockMap) of
+ {value, fail} ->
+ BlockMap;
+ {value, _} ->
+ gb_trees:update(ContLab, fail, BlockMap);
+ none ->
+ gb_trees:insert(ContLab, fail, BlockMap)
+ end,
+ {[I], [], NewBlockMap};
+ _ ->
+ {pop_all(Map)++[I],[],BlockMap}
+ end;
+ #fp_unop{op = 'fwait'} ->
+ Store = pseudo_pop(Map),
+ {Store ++ [I], Map, BlockMap};
+ #fp_unop{} ->
+ {NewI, NewMap} = do_fp_unop(I, LiveOut, Map),
+ {NewI, NewMap, BlockMap};
+ #fp_binop{} ->
+ {NewI, NewMap} = do_fp_binop(I, LiveOut, Map),
+ {NewI, NewMap, BlockMap};
+ #fmove{src = Src, dst = Dst} ->
+ if Src =:= Dst ->
+ %% Don't need to keep this instruction!
+ %% However, we may need to pop from the stack.
+ case is_liveOut(Src, LiveOut) of
+ true->
+ {[], Map, BlockMap};
+ false ->
+ {SwitchInsn, NewMap0} = switch_first(Dst, Map),
+ NewMap = pop(NewMap0),
+ {SwitchInsn++pop_insn(), NewMap, BlockMap}
+ end;
+ true ->
+ {NewI, NewMap} = do_fmove(Src, Dst, LiveOut, Map),
+ {NewI, NewMap, BlockMap}
+ end;
+ _ ->
+ {[I], Map, BlockMap}
+ end.
+
+do_fmove(Src, Dst = #x86_mem{}, LiveOut, Map) ->
+ %% Storing a float from the stack into memory.
+ {SwitchInsn, NewMap0} = switch_first(Src, Map),
+ case is_liveOut(Src, LiveOut) of
+ true ->
+ {SwitchInsn ++ [hipe_x86:mk_fp_unop(fst, Dst)], NewMap0};
+ _ ->
+ NewMap1 = pop(NewMap0),
+ {SwitchInsn ++ [hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1}
+ end;
+do_fmove(Src = #x86_mem{}, Dst, _LiveOut, Map) ->
+ %% Pushing a float into the stack.
+ case in_map(Dst, Map) of
+ true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}});
+ _ -> ok
+ end,
+ {PushOp, [_|NewMap0]} = push(Src, Map),
+ %% We want Dst in the map rather than Src.
+ NewMap = [Dst|NewMap0],
+ {PushOp, NewMap};
+do_fmove(Src, Dst, LiveOut, Map) ->
+ %% Copying a float that either is spilled or is on the fp stack,
+ %% or converting a fixnum in a temp to a float on the fp stack.
+ case in_map(Dst, Map) of
+ true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}});
+ _ -> ok
+ end,
+ IsConv =
+ case Src of
+ #x86_temp{type = Type} -> Type =/= 'double';
+ _ -> false
+ end,
+ case IsConv of
+ true ->
+ do_conv(Src, Dst, Map);
+ _ ->
+ %% Copying.
+ case {is_liveOut(Src, LiveOut), in_map(Src, Map)} of
+ {false, true} ->
+ %% Just remap Dst to Src
+ {Head, [_|T]} = lists:splitwith(fun(X) -> X =/= Src end, Map),
+ {[], Head ++ [Dst|T]};
+ _ ->
+ {PushOp, [_|NewMap0]} = push(Src, Map),
+ %% We want Dst in the map rather than Src.
+ NewMap = [Dst|NewMap0],
+ {PushOp, NewMap}
+ end
+ end.
+
+do_conv(Src = #x86_temp{reg = Reg}, Dst, Map) ->
+ %% Converting. Src must not be a register, so we
+ %% might have to put it into memory in between.
+ {Move, NewSrc} =
+ case ?HIPE_X86_REGISTERS:is_precoloured(Reg) of
+ true ->
+ Temp = hipe_x86:mk_new_temp('untagged'),
+ {[hipe_x86:mk_move(Src,Temp)], Temp};
+ _ ->
+ {[], Src}
+ end,
+ {PushOp, [_|NewMap0]} = push(NewSrc, Map),
+ %% We want Dst in the map rather than NewSrc.
+ NewMap = [Dst|NewMap0],
+ case length(PushOp) of
+ 1 -> %% No popping of memory object on fpstack
+ {Move ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap};
+ _ -> %% H contains pop instructions. Must be kept!
+ Head = butlast(PushOp),
+ {Move ++ Head ++ [hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}
+ end.
+
+do_fp_unop(I = #fp_unop{arg = Arg, op = fchs}, Liveout, Map) ->
+ %% This is fchs, the only operation without a
+ %% popping version. Needs special handling.
+ case is_liveOut(Arg, Liveout) of
+ true ->
+ {SwitchIns, NewMap} = switch_first(Arg, Map),
+ {SwitchIns ++ [I#fp_unop{arg = []}], NewMap};
+ false ->
+ %% Don't need to keep this instruction!
+ %% However, we may need to pop Src from the stack.
+ case in_map(Arg, Map) of
+ true ->
+ {SwitchInsn, NewMap0} = switch_first(Arg, Map),
+ NewMap = pop(NewMap0),
+ {SwitchInsn ++ pop_insn(), NewMap};
+ _ ->
+ {[],Map}
+ end
+ end.
+
+do_fp_binop(#fp_binop{src = Src, dst = Dst, op = Op}, LiveOut, Map) ->
+ case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of
+ {true, true} ->
+ keep_both(Op, Src, Dst, Map);
+ {true, false} ->
+ keep_src(Op, Src, Dst, Map);
+ {false, true} ->
+ keep_dst(Op, Src, Dst, Map);
+ {false, false} ->
+ %% Both Dst and Src are popped.
+ keep_none(Op, Src, Dst, Map)
+ end.
+
+keep_both(Op, Src, Dst, Map) ->
+ %% Keep both Dst and Src if it is there.
+ {SwitchInsn, NewMap} = switch_first(Dst, Map),
+ NewSrc = get_new_opnd(Src, NewMap),
+ Insn = format_fp_binop(Op, NewSrc, mk_st(0)),
+ {SwitchInsn++Insn, NewMap}.
+
+keep_src(Op, Src, Dst, Map) ->
+ %% Pop Dst but keep Src in stack if it is there.
+ {SwitchInsn, NewMap0} = switch_first(Dst, Map),
+ NewSrc = get_new_opnd(Src, NewMap0),
+ NewMap = pop(NewMap0),
+ Insn = format_fp_binop(Op, NewSrc, mk_st(0)),
+ {SwitchInsn ++ Insn ++ pop_insn(), NewMap}.
+
+keep_dst(Op, Src, Dst, Map) ->
+ %% Keep Dst but pop Src.
+ %% Dst must be in stack.
+ DstInMap = in_map(Dst, Map),
+ SrcInMap = in_map(Src, Map),
+ case SrcInMap of
+ true ->
+ case DstInMap of
+ true ->
+ %% Src must be popped. If Dst is on top of the stack we can
+ %% alter the operation rather than shuffle the stack.
+ {SwitchInsn, Insn, NewMap} =
+ if hd(Map) =:= Dst ->
+ NewOp = mk_op_pop(reverse_op(Op)),
+ NewDst = get_new_opnd(Src, Map),
+ TmpMap = lists:map(fun(X) ->
+ if X =:= Src -> Dst; true -> X end
+ end, Map),
+ {[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)};
+ true ->
+ {SwitchInsn1, NewMap0} = switch_first(Src, Map),
+ NewDst = get_new_opnd(Dst,NewMap0),
+ NewOp = mk_op_pop(Op),
+ {SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)}
+ end,
+ {SwitchInsn ++ Insn, NewMap};
+ _ ->
+ %% Src is on the stack, but Dst isn't. Use memory command to avoid
+ %% unnecessary loading instructions.
+ {SwitchInsn, NewMap0} = switch_first(Src, Map),
+ NewOp = reverse_op(Op),
+ NewMap = [Dst] ++ tl(NewMap0),
+ Insn = format_fp_binop(NewOp, Dst, mk_st(0)),
+ {SwitchInsn ++ Insn, NewMap}
+ end;
+ _ ->
+ %% Src isn't in the map so it doesn't have to be popped.
+ {SwitchInsn, NewMap} = switch_first(Dst, Map),
+ {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}], NewMap}
+ end.
+
+keep_none(Op, Src, Dst, Map) ->
+ %% Dst must be on stack.
+ {PushInsn, NewMap0} =
+ case in_map(Dst, Map) of
+ true -> {[], Map};
+ _ -> push(Dst, Map)
+ end,
+ case in_map(Src, NewMap0) of
+ true ->
+ %% Src must be popped.
+ {SwitchInsn1, NewMap1} = switch_first(Src, NewMap0),
+ NewOp = mk_op_pop(Op),
+ NewDst = get_new_opnd(Dst,NewMap1),
+ NewMap2 = pop(NewMap1),
+ %% Then Dst has to be popped.
+ {PopInsn, NewMap} = pop_member(Dst, NewMap2),
+ Insn = format_fp_binop(NewOp, mk_st(0), NewDst),
+ {PushInsn ++ SwitchInsn1 ++ Insn ++ PopInsn, NewMap};
+ _ ->
+ %% Src isn't in the map so it doesn't have to be popped.
+ {SwitchInsn, NewMap1} = switch_first(Dst, NewMap0),
+ NewMap = pop(NewMap1),
+ {SwitchInsn ++ [#fp_unop{arg = Src, op = Op}] ++ pop_insn(), NewMap}
+ end.
+
+format_fp_binop(Op, Src = #x86_temp{}, Dst = #x86_fpreg{reg = Reg}) ->
+ %% Handle that st(0) is sometimes implicit.
+ if Reg =:= 0 -> [hipe_x86:mk_fp_unop(Op, Src)];
+ true -> [hipe_x86:mk_fp_binop(Op, Src, Dst)]
+ end;
+format_fp_binop(Op, Src, Dst) ->
+ [hipe_x86:mk_fp_binop(Op, Src, Dst)].
+
+in_map(X, Map) ->
+ lists:member(X, Map).
+
+push_list(L, Map) ->
+ push_list(L, Map, []).
+push_list([H|T], Map, Acc) ->
+ {Insn, NewMap} = push(H,Map),
+ push_list(T, NewMap, Acc++Insn);
+push_list([], Map, Acc) ->
+ {Acc, Map}.
+
+push(X, Map0) ->
+ {PopInsn, Map} =
+ if length(Map0) > 7 -> pop_a_temp(Map0);
+ true -> {[], Map0}
+ end,
+ NewX = get_new_opnd(X,Map),
+ NewMap = [X | Map],
+ PushOp = [hipe_x86:mk_fp_unop(fld, NewX)],
+ {PopInsn ++ PushOp, NewMap}.
+
+pop([_|Map]) ->
+ Map.
+
+pop_insn() ->
+ [hipe_x86:mk_fp_unop('fstp',mk_st(0))].
+
+pop_dead(Dead, Map) ->
+ Dead0 = [X || X <- Map, lists:member(X,Dead)],
+ pop_dead(Dead0, Map, []).
+
+pop_dead([D|Dead], Map, Code) ->
+ {I, NewMap0} = switch_first(D, Map),
+ NewMap = pop(NewMap0),
+ Store = case D of
+ #x86_temp{} -> [hipe_x86:mk_fp_unop('fstp', D)];
+ _ -> pop_insn()
+ end,
+ pop_dead(Dead, NewMap, Code++I++Store);
+pop_dead([], Map, Code) ->
+ {Code,Map}.
+
+pop_all(Map) ->
+ {Code, _} = pop_dead(Map, Map),
+ Code.
+
+pop_member(Member, Map) ->
+ {Head,[_|T]} = lists:splitwith(fun(X)-> X =/= Member end, Map),
+ {[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))],
+ Head++T}.
+
+pop_a_temp(Map) ->
+ Temp = find_a_temp(Map),
+ {SwitchInsn, NewMap0} = switch_first(Temp, Map),
+ NewMap = pop(NewMap0),
+ {SwitchInsn ++ [hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}.
+
+find_a_temp([H = #x86_temp{}|_]) ->
+ H;
+find_a_temp([_|T]) ->
+ find_a_temp(T);
+find_a_temp([]) ->
+ ?EXIT({noTempOnFPStack,{}}).
+
+switch_first(X, Map = [H|_]) ->
+ Pos = get_pos(X, Map, 0),
+ case Pos of
+ 0 ->
+ {[], Map};
+ notFound ->
+ push(X, Map);
+ _ ->
+ {[_|Head], [_|Tail]} = lists:splitwith(fun(Y)-> Y =/= X end, Map),
+ NewMap = [X|Head] ++ [H|Tail],
+ Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)),
+ {[Ins], NewMap}
+ end;
+switch_first(X, Map) ->
+ push(X, Map).
+
+get_pos(X, [H|T], Pos) ->
+ if X =:= H -> Pos;
+ true -> get_pos(X, T, Pos+1)
+ end;
+get_pos(_, [], _) ->
+ notFound.
+
+get_new_opnd(X, Map) ->
+ I = get_pos(X, Map, 0),
+ case I of
+ notFound ->
+ %% The operand is probably a spilled float.
+ X;
+ _ ->
+ mk_st(I)
+ end.
+
+is_fp(#x86_fpreg{}) ->
+ true;
+is_fp(#x86_mem{type = Type}) ->
+ Type =:= 'double';
+is_fp(#x86_temp{type = Type}) ->
+ Type =:= 'double'.
+
+handle_insn(I) ->
+ case I of
+ #fmove{} -> true;
+ #fp_unop{} -> true;
+ #fp_binop{} -> true;
+ #pseudo_call{} ->true;
+ %% #ret{} -> true;
+ _ -> false
+ end.
+
+is_liveOut(X, LiveOut) ->
+ ordsets:is_element(X, LiveOut).
+
+mk_st(X) ->
+ hipe_x86:mk_fpreg(X, false).
+
+reverse_op(Op) ->
+ case Op of
+ 'fsub' -> 'fsubr';
+ 'fdiv' -> 'fdivr';
+ 'fsubr'-> 'fsub';
+ 'fdivr' -> 'fdiv';
+ _ -> Op
+ end.
+
+mk_op_pop(Op) ->
+ case Op of
+ 'fadd'-> 'faddp';
+ 'fdiv' -> 'fdivp';
+ 'fdivr' -> 'fdivrp';
+ 'fmul' -> 'fmulp';
+ 'fsub' -> 'fsubp';
+ 'fsubr' -> 'fsubrp';
+ _ -> ?EXIT({operandHasNoPopVariant,{Op}})
+ end.
+
+butlast([X|Xs]) -> butlast(Xs,X).
+
+butlast([],_) -> [];
+butlast([X|Xs],Y) -> [Y|butlast(Xs,X)].
+
+%%pp_insn(Op, Src, Dst) ->
+%% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]).
+
+%%pp([I|Ins]) ->
+%% hipe_x86_pp:pp_insn(I),
+%% pp(Ins);
+%%pp([]) ->
+%% [].
+
+pseudo_pop(Map) when length(Map) > 0 ->
+ Dst = hipe_x86:mk_new_temp('double'),
+ pseudo_pop(Dst, length(Map), []);
+pseudo_pop(_) ->
+ [].
+
+pseudo_pop(Dst, St, Acc) when St > 1 ->
+ %% Store all members of the stack to a single temporary to force
+ %% any floating point overflow exceptions to occur even though we
+ %% don't have overflow for the extended double precision in the x87.
+ pseudo_pop(Dst, St-1,
+ [hipe_x86:mk_fp_unop('fxch', mk_st(St-1)),
+ hipe_x86:mk_fp_unop('fst', Dst),
+ hipe_x86:mk_fp_unop('fxch', mk_st(St-1))
+ |Acc]);
+pseudo_pop(Dst, _St, Acc) ->
+ [hipe_x86:mk_fp_unop('fst', Dst)|Acc].