The R13B03 release.OTP_R13B03

author: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
committer: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
commit: 84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree: bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/amd64
download: otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
19 files changed, 2386 insertions, 0 deletions
diff --git a/lib/hipe/amd64/Makefile b/lib/hipe/amd64/Makefile
new file mode 100644
index 0000000000..93e5f086d9
--- /dev/null
+++ b/lib/hipe/amd64/Makefile
@@ -0,0 +1,127 @@
+#
+# %CopyrightBegin%
+# 
+# Copyright Ericsson AB 2004-2009. All Rights Reserved.
+# 
+# The contents of this file are subject to the Erlang Public License,
+# Version 1.1, (the "License"); you may not use this file except in
+# compliance with the License. You should have received a copy of the
+# Erlang Public License along with this software. If not, it can be
+# retrieved online at http://www.erlang.org/.
+# 
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+# the License for the specific language governing rights and limitations
+# under the License.
+# 
+# %CopyrightEnd%
+#
+
+ifndef EBIN
+EBIN = ../ebin
+endif
+
+ifndef DOCS
+DOCS = ../doc
+endif
+
+include $(ERL_TOP)/make/target.mk
+include $(ERL_TOP)/make/$(TARGET)/otp.mk
+
+# ----------------------------------------------------
+# Application version
+# ----------------------------------------------------
+include ../vsn.mk
+VSN=$(HIPE_VSN)
+
+# ----------------------------------------------------
+# Release directory specification
+# ----------------------------------------------------
+RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN)
+
+# ----------------------------------------------------
+# Target Specs
+# ----------------------------------------------------
+# Please keep this list sorted.
+MODULES=hipe_amd64_assemble \
+	hipe_amd64_defuse \
+	hipe_amd64_encode \
+	hipe_amd64_frame \
+	hipe_amd64_liveness \
+	hipe_amd64_main \
+	hipe_amd64_pp \
+	hipe_amd64_ra \
+	hipe_amd64_ra_finalise \
+	hipe_amd64_ra_ls \
+	hipe_amd64_ra_naive \
+	hipe_amd64_ra_postconditions \
+	hipe_amd64_ra_sse2_postconditions \
+	hipe_amd64_ra_x87_ls \
+	hipe_amd64_registers \
+	hipe_amd64_spill_restore \
+	hipe_amd64_x87 \
+	hipe_rtl_to_amd64
+
+ERL_FILES=$(MODULES:%=%.erl)
+TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR))
+DOC_FILES= $(MODULES:%=$(DOCS)/%.html)
+
+# ----------------------------------------------------
+# FLAGS
+# ----------------------------------------------------
+
+include ../native.mk
+
+ERL_COMPILE_FLAGS += -DHIPE_AMD64 +warn_exported_vars
+
+# ----------------------------------------------------
+# Targets
+# ----------------------------------------------------
+
+debug opt: $(TARGET_FILES)
+
+docs: $(DOC_FILES)
+
+clean:
+	rm -f $(TARGET_FILES)
+	rm -f core
+
+$(DOCS)/%.html:%.erl
+	erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop
+
+# ----------------------------------------------------
+# Special Build Targets
+# ----------------------------------------------------
+
+# ----------------------------------------------------
+# Release Target
+# ----------------------------------------------------
+include $(ERL_TOP)/make/otp_release_targets.mk
+
+release_spec: opt
+	$(INSTALL_DIR) $(RELSYSDIR)/ebin
+	$(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin
+
+release_docs_spec:
+
+# Please keep this list sorted.
+$(EBIN)/hipe_amd64_assemble.beam: ../main/hipe.hrl ../rtl/hipe_literals.hrl ../x86/hipe_x86.hrl ../../kernel/src/hipe_ext_format.hrl ../misc/hipe_sdi.hrl ../x86/hipe_x86_assemble.erl
+$(EBIN)/hipe_amd64_defuse.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_defuse.erl
+$(EBIN)/hipe_amd64_frame.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_frame.erl ../rtl/hipe_literals.hrl
+$(EBIN)/hipe_amd64_liveness.beam: ../flow/liveness.inc ../x86/hipe_x86_liveness.erl
+$(EBIN)/hipe_amd64_main.beam: ../main/hipe.hrl ../x86/hipe_x86_main.erl
+$(EBIN)/hipe_amd64_pp.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_pp.erl
+$(EBIN)/hipe_amd64_ra.beam: ../main/hipe.hrl ../x86/hipe_x86_ra.erl
+$(EBIN)/hipe_amd64_ra_dummy.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl
+$(EBIN)/hipe_amd64_ra_finalise.beam: ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_finalise.erl
+$(EBIN)/hipe_amd64_ra_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_ls.erl
+$(EBIN)/hipe_amd64_ra_naive.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_naive.erl
+$(EBIN)/hipe_amd64_ra_postconditions.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../x86/hipe_x86_ra_postconditions.erl
+$(EBIN)/hipe_amd64_ra_sse2_postconditions.beam: ../main/hipe.hrl
+$(EBIN)/hipe_amd64_ra_x87_ls.beam: ../main/hipe.hrl ../x86/hipe_x86_ra_x87_ls.erl
+$(EBIN)/hipe_amd64_registers.beam: ../rtl/hipe_literals.hrl
+$(EBIN)/hipe_amd64_spill_restore.beam: ../main/hipe.hrl ../x86/hipe_x86.hrl ../flow/cfg.hrl ../x86/hipe_x86_spill_restore.erl
+$(EBIN)/hipe_amd64_x87.beam: ../x86/hipe_x86_x87.erl
+$(EBIN)/hipe_rtl_to_amd64.beam: ../x86/hipe_rtl_to_x86.erl ../rtl/hipe_rtl.hrl
+
+$(TARGET_FILES): ../x86/hipe_x86.hrl ../misc/hipe_consttab.hrl
diff --git a/lib/hipe/amd64/hipe_amd64_assemble.erl b/lib/hipe/amd64/hipe_amd64_assemble.erl
new file mode 100644
index 0000000000..db3cfcc6bd
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_assemble.erl
@@ -0,0 +1,19 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+-include("../x86/hipe_x86_assemble.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_defuse.erl b/lib/hipe/amd64/hipe_amd64_defuse.erl
new file mode 100644
index 0000000000..c48e80f3f1
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_defuse.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_defuse.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_encode.erl b/lib/hipe/amd64/hipe_amd64_encode.erl
new file mode 100644
index 0000000000..ee68dfb3b8
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_encode.erl
@@ -0,0 +1,1484 @@
+%%%
+%%% %CopyrightBegin%
+%%% 
+%%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%%% 
+%%% The contents of this file are subject to the Erlang Public License,
+%%% Version 1.1, (the "License"); you may not use this file except in
+%%% compliance with the License. You should have received a copy of the
+%%% Erlang Public License along with this software. If not, it can be
+%%% retrieved online at http://www.erlang.org/.
+%%% 
+%%% Software distributed under the License is distributed on an "AS IS"
+%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%%% the License for the specific language governing rights and limitations
+%%% under the License.
+%%% 
+%%% %CopyrightEnd%
+%%%
+%%% Copyright (C) 2000-2004 Mikael Pettersson
+%%% Copyright (C) 2004 Daniel Luna
+%%%
+%%% This is the syntax of amd64 r/m operands:
+%%%
+%%% opnd  ::= reg			mod == 11
+%%%	    | MEM[ea]			mod != 11
+%%%
+%%% ea    ::= disp32(reg)		mod == 10, r/m != ESP
+%%%  	    | disp32 sib12		mod == 10, r/m == 100
+%%%	    | disp8(reg)		mod == 01, r/m != ESP
+%%%	    | disp8 sib12		mod == 01, r/m == 100
+%%%	    | (reg)			mod == 00, r/m != ESP and EBP
+%%%	    | sib0			mod == 00, r/m == 100
+%%%	    | disp32(%rip)		mod == 00, r/m == 101
+%%%
+%%% // sib0: mod == 00
+%%% sib0  ::= disp32(,index,scale)	base == EBP, index != ESP
+%%%	    | disp32			base == EBP, index == 100
+%%%	    | (base,index,scale)	base != EBP, index != ESP
+%%%	    | (base)			base != EBP, index == 100
+%%%
+%%% // sib12: mod == 01 or 10
+%%% sib12  ::= (base,index,scale)	index != ESP
+%%%	    | (base)			index == 100
+%%%
+%%% scale ::= 00 | 01 | 10 | 11		index << scale
+%%%
+%%% Notes:
+%%%
+%%% 1. ESP cannot be used as index register.
+%%% 2. Use of ESP as base register requires a SIB byte.
+%%% 3. disp(reg), when reg != ESP, can be represented without
+%%%    [r/m == reg] or with [r/m == 100, base == reg] a SIB byte.
+%%% 4. disp32 can be represented without [mod == 00, r/m == 101]
+%%%    or with [mod == 00, r/m == 100, base == 101, index == 100]
+%%%    a SIB byte.
+%%% 5. AMD64 and x86 interpret mod==00b r/m==101b EAs differently:
+%%%    on x86 the disp32 is an absolute address, but on AMD64 the
+%%%    disp32 is relative to the %rip of the next instruction.
+
+-module(hipe_amd64_encode).
+
+-export([% condition codes
+	 cc/1,
+	 % 8-bit registers
+	 %% al/0, cl/0, dl/0, bl/0, ah/0, ch/0, dh/0, bh/0,
+	 % 32-bit registers
+	 %% eax/0, ecx/0, edx/0, ebx/0, esp/0, ebp/0, esi/0, edi/0,
+	 % operands
+	 sindex/2, sib/1, sib/2,
+	 ea_disp32_base/2, ea_disp32_sib/2,
+	 ea_disp8_base/2, ea_disp8_sib/2,
+	 ea_base/1,
+	 ea_disp32_sindex/1, %%ea_disp32_sindex/2,
+	 ea_sib/1, %ea_disp32_rip/1,
+	 rm_reg/1, rm_mem/1,
+	 % instructions
+	 insn_encode/3, insn_sizeof/2]).
+
+%%-define(DO_HIPE_AMD64_ENCODE_TEST,true).
+-ifdef(DO_HIPE_AMD64_ENCODE_TEST).
+-export([dotest/0, dotest/1]).	% for testing, don't use
+-endif.
+
+-define(ASSERT(F,G), if G -> [] ; true -> exit({?MODULE,F}) end).
+%-define(ASSERT(F,G), []).
+
+%%% condition codes
+
+-define(CC_O,  2#0000).	% overflow
+-define(CC_NO, 2#0001).	% no overflow
+-define(CC_B,  2#0010).	% below, <u
+-define(CC_AE, 2#0011).	% above or equal, >=u
+-define(CC_E,  2#0100).	% equal
+-define(CC_NE, 2#0101).	% not equal
+-define(CC_BE, 2#0110).	% below or equal, <=u
+-define(CC_A,  2#0111).	% above, >u
+-define(CC_S,  2#1000).	% sign, +
+-define(CC_NS, 2#1001).	% not sign, -
+-define(CC_PE, 2#1010).	% parity even
+-define(CC_PO, 2#1011).	% parity odd
+-define(CC_L,  2#1100).	% less than, <s
+-define(CC_GE, 2#1101).	% greater or equal, >=s
+-define(CC_LE, 2#1110).	% less or equal, <=s
+-define(CC_G,  2#1111).	% greater than, >s
+
+cc(o) -> ?CC_O;
+cc(no) -> ?CC_NO;
+cc(b) -> ?CC_B;
+cc(ae) -> ?CC_AE;
+cc(e) -> ?CC_E;
+cc(ne) -> ?CC_NE;
+cc(be) -> ?CC_BE;
+cc(a) -> ?CC_A;
+cc(s) -> ?CC_S;
+cc(ns) -> ?CC_NS;
+cc(pe) -> ?CC_PE;
+cc(po) -> ?CC_PO;
+cc(l) -> ?CC_L;
+cc(ge) -> ?CC_GE;
+cc(le) -> ?CC_LE;
+cc(g) -> ?CC_G.
+
+%%% 8-bit registers
+
+-define(AL, 2#000).
+-define(CL, 2#001).
+-define(DL, 2#010).
+-define(BL, 2#011).
+-define(AH, 2#100).
+-define(CH, 2#101).
+-define(DH, 2#110).
+-define(BH, 2#111).
+
+%% al() -> ?AL.
+%% cl() -> ?CL.
+%% dl() -> ?DL.
+%% bl() -> ?BL.
+%% ah() -> ?AH.
+%% ch() -> ?CH.
+%% dh() -> ?DH.
+%% bh() -> ?BH.
+
+%%% 32-bit registers
+
+-define(EAX, 2#000).
+-define(ECX, 2#001).
+-define(EDX, 2#010).
+-define(EBX, 2#011).
+-define(ESP, 2#100).
+-define(EBP, 2#101).
+-define(ESI, 2#110).
+-define(EDI, 2#111).
+
+%% eax() -> ?EAX.
+%% ecx() -> ?ECX.
+%% edx() -> ?EDX.
+%% ebx() -> ?EBX.
+%% esp() -> ?ESP.
+%% ebp() -> ?EBP.
+%% esi() -> ?ESI.
+%% edi() -> ?EDI.
+
+%%% r/m operands
+
+sindex(Scale, Index) when is_integer(Scale), is_integer(Index) ->
+    ?ASSERT(sindex, Scale >= 0),
+    ?ASSERT(sindex, Scale =< 3),
+    ?ASSERT(sindex, Index =/= ?ESP),
+    {sindex, Scale, Index}.
+
+-record(sib, {sindex_opt, base :: integer()}).
+sib(Base) when is_integer(Base) -> #sib{sindex_opt=none, base=Base}.
+sib(Base, Sindex) when is_integer(Base) -> #sib{sindex_opt=Sindex, base=Base}.
+
+ea_disp32_base(Disp32, Base) when is_integer(Base) ->
+    ?ASSERT(ea_disp32_base, Base =/= ?ESP),
+    {ea_disp32_base, Disp32, Base}.
+ea_disp32_sib(Disp32, SIB) -> {ea_disp32_sib, Disp32, SIB}.
+ea_disp8_base(Disp8, Base) when is_integer(Base) ->
+    ?ASSERT(ea_disp8_base, Base =/= ?ESP),
+    {ea_disp8_base, Disp8, Base}.
+ea_disp8_sib(Disp8, SIB) -> {ea_disp8_sib, Disp8, SIB}.
+ea_base(Base) when is_integer(Base) ->
+    ?ASSERT(ea_base, Base =/= ?ESP),
+    ?ASSERT(ea_base, Base =/= ?EBP),
+    {ea_base, Base}.
+ea_disp32_sindex(Disp32) -> {ea_disp32_sindex, Disp32, none}.
+%% ea_disp32_sindex(Disp32, Sindex) -> {ea_disp32_sindex, Disp32, Sindex}.
+ea_sib(SIB) ->
+    ?ASSERT(ea_sib, SIB#sib.base =/= ?EBP),
+    {ea_sib, SIB}.
+%ea_disp32_rip(Disp32) -> {ea_disp32_rip, Disp32}.
+
+rm_reg(Reg) -> {rm_reg, Reg}.
+rm_mem(EA) -> {rm_mem, EA}.
+
+mk_modrm(Mod, RO, RM) ->
+    {rex([{r,RO}, {b,RM}]),
+     (Mod bsl 6) bor ((RO band 2#111) bsl 3) bor (RM band 2#111)}.
+
+mk_sib(Scale, Index, Base) ->
+    {rex([{x,Index}, {b,Base}]),
+     (Scale bsl 6) bor ((Index band 2#111) bsl 3) bor (Base band 2#111)}.
+
+rex(REXs) -> {rex, rex_(REXs)}.
+rex_([]) -> 0;
+rex_([{r8, Reg8}| Rest]) ->             % 8 bit registers
+    case Reg8 of
+	{rm_mem, _} -> rex_(Rest);
+	4 -> (1 bsl 8) bor rex_(Rest);
+	5 -> (1 bsl 8) bor rex_(Rest);
+	6 -> (1 bsl 8) bor rex_(Rest);
+	7 -> (1 bsl 8) bor rex_(Rest);
+	X when is_integer(X) -> rex_(Rest)
+    end;
+rex_([{w, REXW}| Rest]) ->              % 64-bit mode
+    (REXW bsl 3) bor rex_(Rest);
+rex_([{r, ModRM_regRegister}| Rest]) when is_integer(ModRM_regRegister) ->
+    REXR = if (ModRM_regRegister > 7) -> 1;
+              true -> 0
+           end,
+    (REXR bsl 2) bor rex_(Rest);
+rex_([{x, SIB_indexRegister}| Rest]) when is_integer(SIB_indexRegister) ->
+    REXX = if (SIB_indexRegister > 7) -> 1;
+              true -> 0
+           end,
+    (REXX bsl 1) bor rex_(Rest);
+rex_([{b, OtherRegister}| Rest]) when is_integer(OtherRegister) ->
+    %% ModRM r/m, SIB base or opcode reg
+    REXB = if (OtherRegister > 7) -> 1;
+              true -> 0
+           end,
+    REXB bor rex_(Rest).
+
+le16(Word, Tail) ->
+    [Word band 16#FF, (Word bsr 8) band 16#FF | Tail].
+
+le32(Word, Tail) when is_integer(Word) ->
+    [Word band 16#FF, (Word bsr 8) band 16#FF,
+     (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF | Tail];
+le32({Tag,Val}, Tail) ->	% a relocatable datum
+    [{le32,Tag,Val} | Tail].
+
+le64(Word, Tail) when is_integer(Word) ->
+     [ Word         band 16#FF, (Word bsr  8) band 16#FF,
+      (Word bsr 16) band 16#FF, (Word bsr 24) band 16#FF,
+      (Word bsr 32) band 16#FF, (Word bsr 40) band 16#FF,
+      (Word bsr 48) band 16#FF, (Word bsr 56) band 16#FF | Tail];
+le64({Tag,Val}, Tail) ->
+    [{le64,Tag,Val} | Tail].
+
+enc_sindex_opt({sindex,Scale,Index}) -> {Scale, Index};
+enc_sindex_opt(none) -> {2#00, 2#100}.
+
+enc_sib(#sib{sindex_opt=SindexOpt, base=Base}) ->
+    {Scale, Index} = enc_sindex_opt(SindexOpt),
+    mk_sib(Scale, Index, Base).
+
+enc_ea(EA, RO, Tail) ->
+    case EA of
+	{ea_disp32_base, Disp32, Base} ->
+	    [mk_modrm(2#10, RO, Base) | le32(Disp32, Tail)];
+	{ea_disp32_sib, Disp32, SIB} ->
+	    [mk_modrm(2#10, RO, 2#100), enc_sib(SIB) | le32(Disp32, Tail)];
+	{ea_disp8_base, Disp8, Base} ->
+	    [mk_modrm(2#01, RO, Base), Disp8 | Tail];
+	{ea_disp8_sib, Disp8, SIB} ->
+	    [mk_modrm(2#01, RO, 2#100), enc_sib(SIB), Disp8 | Tail];
+	{ea_base, Base} ->
+	    [mk_modrm(2#00, RO, Base) | Tail];
+	{ea_disp32_sindex, Disp32, SindexOpt} ->
+	    {Scale, Index} = enc_sindex_opt(SindexOpt),
+	    SIB = mk_sib(Scale, Index, 2#101),
+	    MODRM = mk_modrm(2#00, RO, 2#100),
+	    [MODRM, SIB | le32(Disp32, Tail)];
+	{ea_sib, SIB} ->
+	    [mk_modrm(2#00, RO, 2#100), enc_sib(SIB) | Tail];
+	{ea_disp32_rip, Disp32} ->
+	    [mk_modrm(2#00, RO, 2#101) | le32(Disp32, Tail)]
+    end.
+
+encode_rm(RM, RO, Tail) ->
+    case RM of
+	{rm_reg, Reg} -> [mk_modrm(2#11, RO, Reg) | Tail];
+	{rm_mem, EA} -> enc_ea(EA, RO, Tail)
+    end.
+
+%% sizeof_ea(EA) ->
+%%     case element(1, EA) of
+%% 	ea_disp32_base -> 5;
+%% 	ea_disp32_sib -> 6;
+%% 	ea_disp8_base -> 2;
+%% 	ea_disp8_sib -> 3;
+%% 	ea_base -> 1;
+%% 	ea_disp32_sindex -> 6;
+%% 	ea_sib -> 2;
+%% 	ea_disp32_rip -> 5
+%%     end.
+
+%% sizeof_rm(RM) ->
+%%    case RM of
+%%	{rm_reg, _} -> 1;
+%%	{rm_mem, EA} -> sizeof_ea(EA)
+%%    end.
+
+%%% x87 stack postitions
+
+-define(ST0, 2#000).
+-define(ST1, 2#001).
+-define(ST2, 2#010).
+-define(ST3, 2#011).
+-define(ST4, 2#100).
+-define(ST5, 2#101).
+-define(ST6, 2#110).
+-define(ST7, 2#111).
+
+st(0) -> ?ST0;
+st(1) -> ?ST1;
+st(2) -> ?ST2;
+st(3) -> ?ST3;
+st(4) -> ?ST4;
+st(5) -> ?ST5;
+st(6) -> ?ST6;
+st(7) -> ?ST7.
+
+
+%%% Instructions
+%%%
+%%% Insn	::= {Op,Opnds}
+%%% Opnds	::= {Opnd1,...,Opndn}	(n >= 0)
+%%% Opnd	::= eax | ax | al | 1 | cl
+%%%		  | {imm32,Imm32} | {imm16,Imm16} | {imm8,Imm8}
+%%%		  | {rm32,RM32} | {rm16,RM16} | {rm8,RM8}
+%%%		  | {rel32,Rel32} | {rel8,Rel8}
+%%%		  | {moffs32,Moffs32} | {moffs16,Moffs16} | {moffs8,Moffs8}
+%%%		  | {cc,CC}
+%%%		  | {reg32,Reg32} | {reg16,Reg16} | {reg8,Reg8}
+%%%		  | {ea,EA}
+
+-define(PFX_OPND_16BITS, 16#66).
+
+arith_binop_encode(SubOpcode, Opnds) ->
+    %% add, or, adc, sbb, and, sub, xor, cmp
+     case Opnds of
+         {eax, {imm32,Imm32}} ->
+             [16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])];
+         {{rm32,RM32}, {imm32,Imm32}} ->
+             [16#81 | encode_rm(RM32, SubOpcode, le32(Imm32, []))];
+         {{rm32,RM32}, {imm8,Imm8}} ->
+             [16#83 | encode_rm(RM32, SubOpcode, [Imm8])];
+         {{rm32,RM32}, {reg32,Reg32}} ->
+             [16#01 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
+         {{reg32,Reg32}, {rm32,RM32}} ->
+             [16#03 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
+         %% Below starts amd64 stuff with rex prefix
+         {rax, {imm32,Imm32}} ->
+             [rex([{w,1}]), 16#05 bor (SubOpcode bsl 3) | le32(Imm32, [])];
+         {{rm64,RM64}, {imm32,Imm32}} ->
+             [rex([{w,1}]), 16#81 
+              | encode_rm(RM64, SubOpcode, le32(Imm32, []))];
+         {{rm64,RM64}, {imm8,Imm8}} ->
+             [rex([{w,1}]), 16#83 | encode_rm(RM64, SubOpcode, [Imm8])];
+         {{rm64,RM64}, {reg64,Reg64}} ->
+             [rex([{w,1}]), 16#01 bor (SubOpcode bsl 3) 
+              | encode_rm(RM64, Reg64, [])];
+         {{reg64,Reg64}, {rm64,RM64}} ->
+             [rex([{w,1}]), 16#03 bor (SubOpcode bsl 3)
+              | encode_rm(RM64, Reg64, [])]
+    end.
+
+sse2_arith_binop_encode(Prefix, Opcode, {{xmm, XMM64}, {rm64fp, RM64}}) ->
+    %% addpd, cmpsd, divsd, maxsd, minsd, mulsd, sqrtsd, subsd
+    [Prefix, 16#0F, Opcode | encode_rm(RM64, XMM64, [])].
+
+sse2_cvtsi2sd_encode({{xmm,XMM64}, {rm64,RM64}}) ->
+    [rex([{w, 1}]), 16#F2, 16#0F, 16#2A�| encode_rm(RM64, XMM64, [])].
+
+sse2_mov_encode(Opnds) ->
+    case Opnds of
+        {{xmm, XMM64}, {rm64fp, RM64}} -> % movsd
+            [16#F2, 16#0F, 16#10�| encode_rm(RM64, XMM64, [])];
+        {{rm64fp, RM64}, {xmm, XMM64}} -> % movsd
+            [16#F2, 16#0F, 16#11�| encode_rm(RM64, XMM64, [])]
+%        {{xmm, XMM64}, {rm64, RM64}} -> % cvtsi2sd
+%            [rex([{w, 1}]), 16#F2, 16#0F, 16#2A�| encode_rm(RM64, XMM64, [])]
+    end.
+
+%% arith_binop_sizeof(Opnds) ->
+%%     %% add, or, adc, sbb, and, sub, xor, cmp
+%%    case Opnds of
+%%	{eax, {imm32,_}} ->
+%%	    1 + 4;
+%%	{{rm32,RM32}, {imm32,_}} ->
+%%	    1 + sizeof_rm(RM32) + 4;
+%%	{{rm32,RM32}, {imm8,_}} ->
+%%	    1 + sizeof_rm(RM32) + 1;
+%%	{{rm32,RM32}, {reg32,_}} ->
+%%	    1 + sizeof_rm(RM32);
+%%	{{reg32,_}, {rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32)
+%%    end.
+
+bs_op_encode(Opcode, {{reg32,Reg32}, {rm32,RM32}}) ->	% bsf, bsr
+    [16#0F, Opcode | encode_rm(RM32, Reg32, [])].
+
+%% bs_op_sizeof({{reg32,_}, {rm32,RM32}}) ->		% bsf, bsr
+%%    2 + sizeof_rm(RM32).
+
+bswap_encode(Opnds) ->
+    case Opnds of
+	{{reg32,Reg32}} ->
+	    [rex([{b, Reg32}]), 16#0F, 16#C8 bor (Reg32 band 2#111)];
+	{{reg64,Reg64}} ->
+	    [rex([{w, 1}, {b, Reg64}]), 16#0F, 16#C8 bor (Reg64 band 2#111)]
+    end.	
+
+%% bswap_sizeof({{reg32,_}}) ->
+%%    2.
+
+bt_op_encode(SubOpcode, Opnds) ->	% bt, btc, btr, bts
+    case Opnds of
+	{{rm32,RM32}, {reg32,Reg32}} ->
+	    [16#0F, 16#A3 bor (SubOpcode bsl 3) | encode_rm(RM32, Reg32, [])];
+	{{rm32,RM32}, {imm8,Imm8}} ->
+	    [16#0F, 16#BA | encode_rm(RM32, SubOpcode, [Imm8])]
+    end.
+
+%% bt_op_sizeof(Opnds) ->			% bt, btc, btr, bts
+%%    case Opnds of
+%%	{{rm32,RM32}, {reg32,_}} ->
+%%	    2 + sizeof_rm(RM32);
+%%	{{rm32,RM32}, {imm8,_}} ->
+%%	    2 + sizeof_rm(RM32) + 1
+%%    end.
+
+call_encode(Opnds) ->
+    case Opnds of
+	{{rel32,Rel32}} ->
+	    [16#E8 | le32(Rel32, [])];
+%%% 	{{rm32,RM32}} ->
+%%% 	    [16#FF | encode_rm(RM32, 2#010, [])];
+	{{rm64,RM64}} -> % Defaults to 64 bits on amd64
+	    [16#FF | encode_rm(RM64, 2#010, [])]
+    end.
+
+%% call_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rel32,_}} ->
+%%	    1 + 4;
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32)
+%%    end.
+
+cbw_encode({}) ->
+    [?PFX_OPND_16BITS, 16#98].
+
+cbw_sizeof({}) ->
+    2.
+
+nullary_op_encode(Opcode, {}) ->
+    %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
+    [Opcode].
+
+nullary_op_sizeof({}) ->
+    %% cdq, clc, cld, cmc, cwde, into, leave, nop, prefix_fs, stc, std
+    1.
+
+cmovcc_encode({{cc,CC}, {reg32,Reg32}, {rm32,RM32}}) ->
+    [16#0F, 16#40 bor CC | encode_rm(RM32, Reg32, [])].
+
+%% cmovcc_sizeof({{cc,_}, {reg32,_}, {rm32,RM32}}) ->
+%%    2 + sizeof_rm(RM32).
+
+incdec_encode(SubOpcode, Opnds) ->	% SubOpcode is either 0 or 1
+    case Opnds of
+	{{rm32,RM32}} ->
+	    [16#FF | encode_rm(RM32, SubOpcode, [])];
+	{{rm64,RM64}} ->
+	    [rex([{w, 1}]), 16#FF | encode_rm(RM64, SubOpcode, [])]
+    end.
+
+%% incdec_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32);
+%%	{{reg32,_}} ->
+%%	    1
+%%    end.
+
+arith_unop_encode(Opcode, Opnds) ->  % div, idiv, mul, neg, not
+    case Opnds of
+	{{rm32,RM32}} ->
+	    [16#F7 | encode_rm(RM32, Opcode, [])];
+	{{rm64,RM64}} ->
+	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, Opcode, [])]
+    end.
+
+%% arith_unop_sizeof({{rm32,RM32}}) ->	% div, idiv, mul, neg, not
+%%    1 + sizeof_rm(RM32).
+
+enter_encode({{imm16,Imm16}, {imm8,Imm8}}) ->
+    [16#C8 | le16(Imm16, [Imm8])].
+
+enter_sizeof({{imm16,_}, {imm8,_}}) ->
+    1 + 2 + 1.
+
+imul_encode(Opnds) ->
+    case Opnds of
+	{{rm32,RM32}} ->				% <edx,eax> *= rm32
+	    [16#F7 | encode_rm(RM32, 2#101, [])];
+	{{rm64,RM64}} ->
+	    [rex([{w,1}]), 16#F7 | encode_rm(RM64, 2#101, [])];
+	{{reg32,Reg32}, {rm32,RM32}} ->			% reg *= rm32
+	    [16#0F, 16#AF | encode_rm(RM32, Reg32, [])];
+	{{reg64,Reg64}, {rm64,RM64}} ->
+	    [rex([{w,1}]), 16#0F, 16#AF | encode_rm(RM64, Reg64, [])];
+	{{reg32,Reg32}, {rm32,RM32}, {imm8,Imm8}} ->	% reg := rm32 * sext(imm8)
+	    [16#6B | encode_rm(RM32, Reg32, [Imm8])];
+	{{reg64,Reg64}, {rm64,RM64}, {imm8,Imm8}} ->
+	    [rex([{w,1}]), 16#6B | encode_rm(RM64, Reg64, [Imm8])];
+	{{reg32,Reg32}, {rm32,RM32}, {imm32,Imm32}} ->	% reg := rm32 * imm32
+	    [16#69 | encode_rm(RM32, Reg32, le32(Imm32, []))];
+	{{reg64,Reg64}, {rm64,RM64}, {imm32,Imm32}} ->
+	    [rex([{w,1}]), 16#69 | encode_rm(RM64, Reg64, le32(Imm32, []))]
+    end.
+
+%% imul_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32);
+%%	{{reg32,_}, {rm32,RM32}} ->
+%%	    2 + sizeof_rm(RM32);
+%%	{{reg32,_}, {rm32,RM32}, {imm8,_}} ->
+%%	    1 + sizeof_rm(RM32) + 1;
+%%	{{reg32,_}, {rm32,RM32}, {imm32,_}} ->
+%%	    1 + sizeof_rm(RM32) + 4
+%%    end.
+
+jcc_encode(Opnds) ->
+    case Opnds of
+	{{cc,CC}, {rel8,Rel8}} ->
+	    [16#70 bor CC, Rel8];
+	{{cc,CC}, {rel32,Rel32}} ->
+	    [16#0F, 16#80 bor CC | le32(Rel32, [])]
+    end.
+
+jcc_sizeof(Opnds) ->
+    case Opnds of
+       {{cc,_}, {rel8,_}} ->
+	   2;
+       {{cc,_}, {rel32,_}} ->
+	   2 + 4
+   end.
+
+jmp8_op_encode(Opcode, {{rel8,Rel8}}) ->	% jecxz, loop, loope, loopne
+    [Opcode, Rel8].
+
+jmp8_op_sizeof({{rel8,_}}) ->			% jecxz, loop, loope, loopne
+    2.
+
+jmp_encode(Opnds) ->
+    case Opnds of
+	{{rel8,Rel8}} ->
+	    [16#EB, Rel8];
+	{{rel32,Rel32}} ->
+	    [16#E9 | le32(Rel32, [])];
+%%% 	{{rm32,RM32}} ->
+%%% 	    [16#FF | encode_rm(RM32, 2#100, [])]
+	{{rm64,RM64}} ->
+	    [16#FF | encode_rm(RM64, 2#100, [])]
+    end.
+
+%% jmp_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rel8,_}} ->
+%%	    2;
+%%	{{rel32,_}} ->
+%%	    1 + 4;
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32)
+%%    end.
+
+lea_encode({{reg32,Reg32}, {ea,EA}}) ->
+    [16#8D | enc_ea(EA, Reg32, [])];
+lea_encode({{reg64,Reg64}, {ea,EA}}) ->
+    [rex([{w, 1}]), 16#8D | enc_ea(EA, Reg64, [])].
+
+%% lea_sizeof({{reg32,_}, {ea,EA}}) ->
+%%    1 + sizeof_ea(EA).
+
+mov_encode(Opnds) ->
+    case Opnds of
+	{{rm8,RM8}, {reg8,Reg8}} ->
+	    [rex([{r8, RM8}, {r8, Reg8}]), 16#88 | encode_rm(RM8, Reg8, [])];
+	{{rm16,RM16}, {reg16,Reg16}} ->
+	    [?PFX_OPND_16BITS, 16#89 | encode_rm(RM16, Reg16, [])];
+	{{rm32,RM32}, {reg32,Reg32}} ->
+	    [16#89 | encode_rm(RM32, Reg32, [])];
+	{{rm64,RM64}, {reg64,Reg64}} ->
+	    [rex([{w, 1}]), 16#89 | encode_rm(RM64, Reg64, [])];
+	{{reg8,Reg8}, {rm8,RM8}} ->
+	    [rex([{r8, RM8}, {r8, Reg8}]), 16#8A |
+	     encode_rm(RM8, Reg8, [])];
+	{{reg16,Reg16}, {rm16,RM16}} ->
+	    [?PFX_OPND_16BITS, 16#8B | encode_rm(RM16, Reg16, [])];
+	{{reg32,Reg32}, {rm32,RM32}} ->
+	    [16#8B | encode_rm(RM32, Reg32, [])];
+	{{reg64,Reg64}, {rm64,RM64}} ->
+	    [rex([{w, 1}]), 16#8B | encode_rm(RM64, Reg64, [])];
+	{al, {moffs8,Moffs8}} ->
+	    [16#A0 | le32(Moffs8, [])];
+	{ax, {moffs16,Moffs16}} ->
+	    [?PFX_OPND_16BITS, 16#A1 | le32(Moffs16, [])];
+	{eax, {moffs32,Moffs32}} ->
+	    [16#A1 | le32(Moffs32, [])];
+	{rax, {moffs32,Moffs32}} ->
+	    [rex([{w, 1}]), 16#A1 | le32(Moffs32, [])];
+	{{moffs8,Moffs8}, al} ->
+	    [16#A2 | le32(Moffs8, [])];
+	{{moffs16,Moffs16}, ax} ->
+	    [?PFX_OPND_16BITS, 16#A3 | le32(Moffs16, [])];
+	{{moffs32,Moffs32}, eax} ->
+	    [16#A3 | le32(Moffs32, [])];
+	{{moffs32,Moffs32}, rax} ->
+	    [rex([{w, 1}]), 16#A3 | le32(Moffs32, [])];
+	{{reg8,Reg8}, {imm8,Imm8}} ->
+	    [rex([{b, Reg8}, {r8, Reg8}]), 16#B0 bor (Reg8 band 2#111), Imm8];
+	{{reg16,Reg16}, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, rex([{b, Reg16}]), 16#B8 bor (Reg16 band 2#111) 
+             | le16(Imm16, [])];
+	{{reg32,Reg32}, {imm32,Imm32}} ->
+	    [rex([{b, Reg32}]), 16#B8 bor (Reg32 band 2#111)
+             | le32(Imm32, [])];
+	{{reg64,Reg64}, {imm64,Imm64}} ->
+	    [rex([{w, 1}, {b, Reg64}]), 16#B8 bor (Reg64 band 2#111)
+             | le64(Imm64, [])];
+	{{rm8,RM8}, {imm8,Imm8}} ->
+	    [rex([{r8, RM8}]), 16#C6 | encode_rm(RM8, 2#000, [Imm8])];
+	{{rm16,RM16}, {imm16,Imm16}} ->
+	    [?PFX_OPND_16BITS, 16#C7 |
+             encode_rm(RM16, 2#000, le16(Imm16, []))];
+	{{rm32,RM32}, {imm32,Imm32}} ->
+	    [16#C7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
+	{{rm64,RM64}, {imm32,Imm32}} ->
+	    [rex([{w, 1}]), 16#C7 | encode_rm(RM64, 2#000, le32(Imm32, []))]
+    end.
+
+%% mov_sizeof(Opnds) ->
+%%     case Opnds of
+%% 	{{rm8,RM8}, {reg8,_}} ->
+%% 	    1 + sizeof_rm(RM8);
+%% 	{{rm16,RM16}, {reg16,_}} ->
+%% 	    2 + sizeof_rm(RM16);
+%% 	{{rm32,RM32}, {reg32,_}} ->
+%% 	    1 + sizeof_rm(RM32);
+%% 	{{reg8,_}, {rm8,RM8}} ->
+%% 	    1 + sizeof_rm(RM8);
+%% 	{{reg16,_}, {rm16,RM16}} ->
+%% 	    2 + sizeof_rm(RM16);
+%% 	{{reg32,_}, {rm32,RM32}} ->
+%% 	    1 + sizeof_rm(RM32);
+%% 	{al, {moffs8,_}} ->
+%% 	    1 + 4;
+%% 	{ax, {moffs16,_}} ->
+%% 	    2 + 4;
+%% 	{eax, {moffs32,_}} ->
+%% 	    1 + 4;
+%% 	{{moffs8,_}, al} ->
+%% 	    1 + 4;
+%% 	{{moffs16,_}, ax} ->
+%% 	    2 + 4;
+%% 	{{moffs32,_}, eax} ->
+%% 	    1 + 4;
+%% 	{{reg8,_}, {imm8,_}} ->
+%% 	    2;
+%% 	{{reg16,_}, {imm16,_}} ->
+%% 	    2 + 2;
+%% 	{{reg32,_}, {imm32,_}} ->
+%% 	    1 + 4;
+%% 	{{rm8,RM8}, {imm8,_}} ->
+%% 	    1 + sizeof_rm(RM8) + 1;
+%% 	{{rm16,RM16}, {imm16,_}} ->
+%% 	    2 + sizeof_rm(RM16) + 2;
+%% 	{{rm32,RM32}, {imm32,_}} ->
+%% 	    1 + sizeof_rm(RM32) + 4
+%%     end.
+
+movx_op_encode(Opcode, Opnds) ->	% movsx, movzx
+    case Opnds of
+	{{reg16,Reg16}, {rm8,RM8}} ->
+	    [?PFX_OPND_16BITS, rex([{r8, RM8}]), 16#0F, Opcode |
+	     encode_rm(RM8, Reg16, [])];
+	{{reg32,Reg32}, {rm8,RM8}} ->
+	    [rex([{r8, RM8}]), 16#0F, Opcode | encode_rm(RM8, Reg32, [])];
+	{{reg32,Reg32}, {rm16,RM16}} ->
+	    [16#0F, Opcode bor 1 | encode_rm(RM16, Reg32, [])];
+	{{reg64,Reg64}, {rm8,RM8}} ->
+	    [rex([{w,1}]), 16#0F, Opcode | encode_rm(RM8, Reg64, [])];
+	{{reg64,Reg64}, {rm16,RM16}} ->
+	    [rex([{w,1}]), 16#0F, Opcode bor 1 | encode_rm(RM16, Reg64, [])];
+	{{reg64,Reg64}, {rm32,RM32}} ->
+            %% This is magic... /Luna
+	    [rex([{w,(1 band (Opcode bsr 3))}]), 16#63 |
+             encode_rm(RM32, Reg64, [])]
+    end.
+
+%% movx_op_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{reg16,_}, {rm8,RM8}} ->
+%%	    3 + sizeof_rm(RM8);
+%%	{{reg32,_}, {rm8,RM8}} ->
+%%	    1 + 2 + sizeof_rm(RM8);
+%%	{{reg32,_}, {rm16,RM16}} ->
+%%	    1 + 2 + sizeof_rm(RM16)
+%%    end.
+
+pop_encode(Opnds) ->
+    case Opnds of
+	{{rm64,RM64}} ->
+	    [16#8F | encode_rm(RM64, 2#000, [])];
+	{{reg64,Reg64}} ->
+	    [rex([{b,Reg64}]),16#58 bor (Reg64 band 2#111)]
+    end.
+
+%% pop_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32);
+%%	{{reg32,_}} ->
+%%	    1
+%%    end.
+
+push_encode(Opnds) ->
+    case Opnds of
+%%% 	{{rm32,RM32}} ->
+%%% 	    [16#FF | encode_rm(RM32, 2#110, [])];
+	{{rm64,RM64}} ->
+	    [16#FF | encode_rm(RM64, 2#110, [])];
+%%% 	{{reg32,Reg32}} ->
+%%% 	    [rex([{b, 1}]), 16#50 bor (Reg32 band 2#111)];
+	{{reg64,Reg64}} ->
+	    [rex([{b, Reg64}]), 16#50 bor (Reg64 band 2#111)];        
+	{{imm8,Imm8}} ->	% sign-extended
+	    [16#6A, Imm8];
+	{{imm32,Imm32}} -> % Sign extended to 64 bits
+	    [16#68 | le32(Imm32, [])]
+    end.
+
+%% push_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm32,RM32}} ->
+%%	    1 + sizeof_rm(RM32);
+%%	{{reg32,_}} ->
+%%	    1;
+%%	{{imm8,_}} ->
+%%	    2;
+%%	{{imm32,_}} ->
+%%	    1 + 4
+%%    end.
+
+shift_op_encode(SubOpcode, Opnds) ->	% rol, ror, rcl, rcr, shl, shr, sar
+     case Opnds of
+         {{rm32,RM32}, 1} ->
+             [16#D1 | encode_rm(RM32, SubOpcode, [])];
+         {{rm32,RM32}, cl} ->
+             [16#D3 | encode_rm(RM32, SubOpcode, [])];
+         {{rm32,RM32}, {imm8,Imm8}} ->
+             [16#C1 | encode_rm(RM32, SubOpcode, [Imm8])];
+         {{rm64,RM64}, 1} ->
+             [rex([{w,1}]), 16#D1 | encode_rm(RM64, SubOpcode, [])];
+         {{rm64,RM64}, cl} ->
+             [rex([{w,1}]), 16#D3 | encode_rm(RM64, SubOpcode, [])];
+         {{rm64,RM64}, {imm8,Imm8}} ->
+             [rex([{w,1}]), 16#C1 | encode_rm(RM64, SubOpcode, [Imm8])]
+     end.
+
+%% shift_op_sizeof(Opnds) ->		% rcl, rcr, rol, ror, sar, shl, shr
+%%     case Opnds of
+%% 	{{rm32,RM32}, 1} ->
+%% 	    1 + sizeof_rm(RM32);
+%% 	{{rm32,RM32}, cl} ->
+%% 	    1 + sizeof_rm(RM32);
+%% 	{{rm32,RM32}, {imm8,_Imm8}} ->
+%% 	    1 + sizeof_rm(RM32) + 1
+%%     end.
+
+ret_encode(Opnds) ->
+    case Opnds of
+	{} ->
+	    [16#C3];
+	{{imm16,Imm16}} ->
+	    [16#C2 | le16(Imm16, [])]
+    end.
+
+ret_sizeof(Opnds) ->
+    case Opnds of
+	{} ->
+	    1;
+	{{imm16,_}} ->
+	    1 + 2
+    end.
+
+setcc_encode({{cc,CC}, {rm8,RM8}}) ->
+    [rex([{r8, RM8}]), 16#0F, 16#90 bor CC | encode_rm(RM8, 2#000, [])].
+
+%% setcc_sizeof({{cc,_}, {rm8,RM8}}) ->
+%%    2 + sizeof_rm(RM8).
+
+shd_op_encode(Opcode, Opnds) ->
+    case Opnds of
+	{{rm32,RM32}, {reg32,Reg32}, {imm8,Imm8}} ->
+	    [16#0F, Opcode | encode_rm(RM32, Reg32, [Imm8])];
+	{{rm32,RM32}, {reg32,Reg32}, cl} ->
+	    [16#0F, Opcode bor 1 | encode_rm(RM32, Reg32, [])]
+    end.
+
+%% shd_op_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm32,RM32}, {reg32,_}, {imm8,_}} ->
+%%	    2 + sizeof_rm(RM32) + 1;
+%%	{{rm32,RM32}, {reg32,_}, cl} ->
+%%	    2 + sizeof_rm(RM32)
+%%    end.
+
+test_encode(Opnds) ->
+    case Opnds of
+	{eax, {imm32,Imm32}} ->
+	    [16#A9 | le32(Imm32, [])];
+	{{rm32,RM32}, {imm32,Imm32}} ->
+	    [16#F7 | encode_rm(RM32, 2#000, le32(Imm32, []))];
+	{{rm32,RM32}, {reg32,Reg32}} ->
+	    [16#85 | encode_rm(RM32, Reg32, [])]
+    end.
+
+%% test_sizeof(Opnds) ->
+%%     case Opnds of
+%% 	{eax, {imm32,_}} ->
+%% 	    1 + 4;
+%% 	{{rm32,RM32}, {imm32,_}} ->
+%% 	    1 + sizeof_rm(RM32) + 4;
+%% 	{{rm32,RM32}, {reg32,_}} ->
+%% 	    1 + sizeof_rm(RM32)
+%%     end.
+
+fild_encode(Opnds) ->
+    %% The operand cannot be a register!
+    {{rm64, RM64}} = Opnds,
+    [16#DB | encode_rm(RM64, 2#000, [])].
+
+%% fild_sizeof(Opnds) ->
+%%    {{rm32, RM32}} = Opnds,
+%%    1 + sizeof_rm(RM32).
+
+fld_encode(Opnds) ->
+    case Opnds of
+	{{rm64fp, RM64fp}} ->
+	    [16#DD | encode_rm(RM64fp, 2#000, [])];
+	{{fpst, St}} ->
+	    [16#D9, 16#C0 bor st(St)]
+    end.
+
+%% fld_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm64fp, RM64fp}} ->
+%%	    1 + sizeof_rm(RM64fp);
+%%	{{fpst, _}} ->
+%%	    2
+%%    end.
+
+x87_comm_arith_encode(OpCode, Opnds) ->
+    %% fadd, fmul
+    case Opnds of
+	{{rm64fp, RM64fp}} ->
+	    [16#DC | encode_rm(RM64fp, OpCode, [])];
+	{{fpst,0}, {fpst,St}} ->
+	    [16#D8, (16#C0 bor (OpCode bsl 3)) bor st(St)];
+	{{fpst,St}, {fpst,0}} ->	
+	    [16#DC, (16#C0 bor (OpCode bsl 3)) bor st(St)]
+    end.
+	    
+x87_comm_arith_pop_encode(OpCode, Opnds) ->
+    %% faddp, fmulp
+    case Opnds of
+	[] ->
+	    [16#DE, 16#C0 bor (OpCode bsl 3) bor st(1)];
+	{{fpst,St},{fpst,0}} ->
+	    [16#DE, 16#C0 bor (OpCode bsl 3) bor st(St)]
+    end.
+
+x87_arith_encode(OpCode, Opnds) ->
+    %% fdiv, fsub
+    case Opnds of
+	{{rm64fp, RM64fp}} ->
+	    [16#DC | encode_rm(RM64fp, OpCode, [])];
+	{{fpst,0}, {fpst,St}} ->
+	    OpCode0 = OpCode band 2#110,
+	    [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
+	{{fpst,St}, {fpst,0}} ->
+	    OpCode0 = OpCode bor 1,
+	    [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+    end.
+	    
+x87_arith_pop_encode(OpCode, Opnds) ->
+    %% fdivp, fsubp
+    OpCode0 = OpCode bor 1,
+    case Opnds of
+	[] ->
+	    [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(1)];
+	{{fpst,St}, {fpst,0}} ->
+	    [16#DE, 16#C8 bor (OpCode0 bsl 3) bor st(St)]
+    end.
+
+x87_arith_rev_encode(OpCode, Opnds) ->
+    %% fdivr, fsubr
+    case Opnds of
+	{{rm64fp, RM64fp}} ->
+	    [16#DC | encode_rm(RM64fp, OpCode, [])];
+	{{fpst,0}, {fpst,St}} ->
+	    OpCode0 = OpCode bor 1,
+	    [16#D8, 16#C0 bor (OpCode0 bsl 3) bor st(St)];
+	{{fpst,St}, {fpst,0}} ->
+	    OpCode0 = OpCode band 2#110,
+	    [16#DC, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+    end.
+	    
+x87_arith_rev_pop_encode(OpCode, Opnds) ->
+    %% fdivrp, fsubrp
+    OpCode0 = OpCode band 2#110,
+    case Opnds of
+	[] ->
+	    [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(1)];
+	{{fpst,St}, {fpst, 0}} ->
+	    [16#DE, 16#C0 bor (OpCode0 bsl 3) bor st(St)]
+    end.
+
+%% x87_arith_sizeof(Opnds) ->
+%%    case Opnds of
+%%	{{rm64fp, RM64fp}} ->
+%%	    1 + sizeof_rm(RM64fp);
+%%	{{fpst,0}, {fpst,_}} ->
+%%	    2;
+%%	{{fpst,_}, {fpst,0}} ->
+%%	    2
+%%    end.
+
+fst_encode(OpCode, Opnds) ->
+    case Opnds of
+	{{rm64fp, RM64fp}} ->
+	    [16#DD | encode_rm(RM64fp, OpCode, [])];
+	{{fpst, St}} ->
+	    [16#DD, 16#C0 bor (OpCode bsl 3) bor st(St)]
+    end.
+
+%% fst_sizeof(Opnds) ->
+%%     case Opnds of
+%% 	{{rm64fp, RM64fp}} ->
+%% 	    1 + sizeof_rm(RM64fp);
+%% 	{{fpst, _}} ->
+%% 	    2
+%%     end.
+    
+fchs_encode() ->
+    [16#D9, 16#E0].
+
+fchs_sizeof() ->
+    2.
+
+ffree_encode({{fpst, St}})->
+    [16#DD, 16#C0 bor st(St)].
+
+ffree_sizeof() ->
+    2.
+
+fwait_encode() ->
+    [16#9B].
+
+fwait_sizeof() ->
+    1.
+
+fxch_encode(Opnds) ->
+    case Opnds of
+	[] ->
+	    [16#D9, 16#C8 bor st(1)];
+	{{fpst, St}} ->
+	    [16#D9, 16#C8 bor st(St)]
+    end.
+
+fxch_sizeof() ->
+    2.
+
+insn_encode(Op, Opnds, Offset) ->
+    Bytes_and_REX = insn_encode_internal(Op, Opnds),
+    Bytes         = fix_rex(Bytes_and_REX),
+    case has_relocs(Bytes) of
+	false ->	% the common case
+	    {Bytes, []};
+	_ ->
+	    fix_relocs(Bytes, Offset, [], [])
+    end.
+
+fix_rex(Bytes) ->
+    fix_rex(Bytes, 2#0100 bsl 4, []).
+
+fix_rex([{rex, REX} | Rest], REXAcc, Bytes) ->
+    fix_rex(Rest, REXAcc bor REX, Bytes);
+fix_rex([{{rex, REX}, Byte} | Rest], REXAcc, Bytes) ->
+    fix_rex(Rest, REXAcc bor REX, [Byte | Bytes]);
+fix_rex([Byte | Rest], REXAcc, Bytes) ->
+    fix_rex(Rest, REXAcc, [Byte | Bytes]);
+fix_rex([], 2#01000000, Bytes) ->              % no rex prefix
+    lists:reverse(Bytes);
+fix_rex([], REX0, Bytes) ->                    % rex prefix...
+    REX = REX0 band 16#FF, % for 8 bit registers
+    [Head|Tail] = lists:reverse(Bytes),
+    case Head of
+        16#66 ->                               % ...and 16 bit/sse2 prefix
+            [16#66, REX | Tail];
+	16#F2 ->                               % ...and sse2 prefix
+	    [16#F2, REX | Tail];
+        _ ->                                   % ...only
+           [REX, Head | Tail]
+    end.
+
+has_relocs([{le32,_,_}|_]) -> true;
+has_relocs([{le64,_,_}|_]) -> true;
+has_relocs([_|Bytes]) -> has_relocs(Bytes);
+has_relocs([]) -> false.
+
+fix_relocs([{le32,Tag,Val}|Bytes], Offset, Code, Relocs) ->
+    fix_relocs(Bytes, Offset+4,
+	       [16#00, 16#00, 16#00, 16#00 | Code],
+	       [{Tag,Offset,Val}|Relocs]);
+fix_relocs([{le64,Tag,Val}|Bytes], Offset, Code, Relocs) ->
+    fix_relocs(Bytes, Offset+8,
+	       [16#00, 16#00, 16#00, 16#00,
+                16#00, 16#00, 16#00, 16#00 | Code],
+	       [{Tag,Offset,Val}|Relocs]);
+fix_relocs([Byte|Bytes], Offset, Code, Relocs) ->
+    fix_relocs(Bytes, Offset+1, [Byte|Code], Relocs);
+fix_relocs([], _Offset, Code, Relocs) ->
+    {lists:reverse(Code), lists:reverse(Relocs)}.
+
+insn_encode_internal(Op, Opnds) ->
+    case Op of
+	'adc' -> arith_binop_encode(2#010, Opnds);
+	'add' -> arith_binop_encode(2#000, Opnds);
+	'and' -> arith_binop_encode(2#100, Opnds);
+	'bsf' -> bs_op_encode(16#BC, Opnds);
+	'bsr' -> bs_op_encode(16#BD, Opnds);
+	'bswap' -> bswap_encode(Opnds);
+	'bt' -> bt_op_encode(2#100, Opnds);
+	'btc' -> bt_op_encode(2#111, Opnds);
+	'btr' -> bt_op_encode(2#110, Opnds);
+	'bts' -> bt_op_encode(2#101, Opnds);
+	'call' -> call_encode(Opnds);
+	'cbw' -> cbw_encode(Opnds);
+	'cdq' -> nullary_op_encode(16#99, Opnds);
+	'clc' -> nullary_op_encode(16#F8, Opnds);
+	'cld' -> nullary_op_encode(16#FC, Opnds);
+	'cmc' -> nullary_op_encode(16#F5, Opnds);
+	'cmovcc' -> cmovcc_encode(Opnds);
+	'cmp' -> arith_binop_encode(2#111, Opnds);
+	'cwde' -> nullary_op_encode(16#98, Opnds);
+	'dec' -> incdec_encode(2#001, Opnds);
+	'div' -> arith_unop_encode(2#110, Opnds);
+	'enter' -> enter_encode(Opnds);
+	'idiv' -> arith_unop_encode(2#111, Opnds);
+	'imul' -> imul_encode(Opnds);
+	'inc' -> incdec_encode(2#000, Opnds);
+	'into' -> case get(hipe_target_arch) of
+                      x86   -> nullary_op_encode(16#CE, Opnds);
+                      amd64 -> exit({invalid_amd64_opcode,
+                                     hipe_amd64_encode__erl})
+                  end;
+	'jcc' -> jcc_encode(Opnds);
+	'jecxz' -> jmp8_op_encode(16#E3, Opnds);
+	'jmp' -> jmp_encode(Opnds);
+	'lea' -> lea_encode(Opnds);
+	'leave' -> nullary_op_encode(16#C9, Opnds);
+	'loop' -> jmp8_op_encode(16#E2, Opnds);
+	'loope' -> jmp8_op_encode(16#E1, Opnds);
+	'loopne' -> jmp8_op_encode(16#E0, Opnds);
+	'mov' -> mov_encode(Opnds);
+	'movsx' -> movx_op_encode(16#BE, Opnds);
+	'movzx' -> movx_op_encode(16#B6, Opnds);
+	'mul' -> arith_unop_encode(2#100, Opnds);
+	'neg' -> arith_unop_encode(2#011, Opnds);
+	'nop' -> nullary_op_encode(16#90, Opnds);
+	'not' -> arith_unop_encode(2#010, Opnds);
+	'or' -> arith_binop_encode(2#001, Opnds);
+	'pop' -> pop_encode(Opnds);
+	'prefix_fs' -> nullary_op_encode(16#64, Opnds);
+	'push' -> push_encode(Opnds);
+	'rcl' -> shift_op_encode(2#010, Opnds);
+	'rcr' -> shift_op_encode(2#011, Opnds);
+	'ret' -> ret_encode(Opnds);
+	'rol' -> shift_op_encode(2#000, Opnds);
+	'ror' -> shift_op_encode(2#001, Opnds);
+	'sar' -> shift_op_encode(2#111, Opnds);
+	'sbb' -> arith_binop_encode(2#011, Opnds);
+	'setcc' -> setcc_encode(Opnds);
+	'shl' -> shift_op_encode(2#100, Opnds);
+	'shld' -> shd_op_encode(16#A4, Opnds);
+	'shr' -> shift_op_encode(2#101, Opnds);
+	'shrd' -> shd_op_encode(16#AC, Opnds);
+	'stc' -> nullary_op_encode(16#F9, Opnds);
+	'std' -> nullary_op_encode(16#FD, Opnds);
+	'sub' -> arith_binop_encode(2#101, Opnds);
+	'test' -> test_encode(Opnds);
+	'xor' -> arith_binop_encode(2#110, Opnds);
+
+        %% sse2
+        'addsd'   -> sse2_arith_binop_encode(16#F2, 16#58, Opnds);
+        'cmpsd'   -> sse2_arith_binop_encode(16#F2, 16#C2, Opnds);
+        'comisd'  -> sse2_arith_binop_encode(16#66, 16#2F, Opnds);
+	'cvtsi2sd' -> sse2_cvtsi2sd_encode(Opnds);
+        'divsd'   -> sse2_arith_binop_encode(16#F2, 16#5E, Opnds);
+        'maxsd'   -> sse2_arith_binop_encode(16#F2, 16#5F, Opnds);
+        'minsd'   -> sse2_arith_binop_encode(16#F2, 16#5D, Opnds);
+        'movsd'   -> sse2_mov_encode(Opnds);
+        'mulsd'   -> sse2_arith_binop_encode(16#F2, 16#59, Opnds);
+        'sqrtsd'  -> sse2_arith_binop_encode(16#F2, 16#51, Opnds);
+        'subsd'   -> sse2_arith_binop_encode(16#F2, 16#5C, Opnds);
+        'ucomisd' -> sse2_arith_binop_encode(16#66, 16#2E, Opnds);
+	'xorpd'   -> sse2_arith_binop_encode(16#66, 16#57, Opnds);
+        %% End of sse2
+
+	%% x87
+	'fadd'   -> x87_comm_arith_encode(2#000, Opnds);
+	'faddp'  -> x87_comm_arith_pop_encode(2#000, Opnds);
+	'fchs'   -> fchs_encode();
+	'fdiv'   -> x87_arith_encode(2#110, Opnds);
+	'fdivp'  -> x87_arith_pop_encode(2#110, Opnds);
+	'fdivr'  -> x87_arith_rev_encode(2#111, Opnds);
+	'fdivrp' -> x87_arith_rev_pop_encode(2#111, Opnds);
+	'ffree'  -> ffree_encode(Opnds);
+	'fild'   -> fild_encode(Opnds);
+	'fld'    -> fld_encode(Opnds);
+	'fmul'   -> x87_comm_arith_encode(2#001, Opnds);
+	'fmulp'  -> x87_comm_arith_pop_encode(2#001, Opnds);
+	'fst'    -> fst_encode(2#010, Opnds);
+	'fstp'   -> fst_encode(2#011, Opnds);
+	'fsub'   -> x87_arith_encode(2#100, Opnds);
+	'fsubp'  -> x87_arith_pop_encode(2#100, Opnds);
+	'fsubr'  -> x87_arith_rev_encode(2#101, Opnds);
+	'fsubrp' -> x87_arith_rev_pop_encode(2#101, Opnds);
+	'fwait'  -> fwait_encode();
+	'fxch'   -> fxch_encode(Opnds);
+	%% End of x87
+
+	_ -> exit({?MODULE,insn_encode,Op})
+    end.
+
+insn_sizeof(Op, Opnds) ->
+    case Op of
+	'cbw' -> cbw_sizeof(Opnds);
+  	'cdq' -> nullary_op_sizeof(Opnds);
+  	'clc' -> nullary_op_sizeof(Opnds);
+  	'cld' -> nullary_op_sizeof(Opnds);
+  	'cmc' -> nullary_op_sizeof(Opnds);
+  	'cwde' -> nullary_op_sizeof(Opnds);
+  	'enter' -> enter_sizeof(Opnds);
+  	'into' -> nullary_op_sizeof(Opnds);
+  	'jcc' -> jcc_sizeof(Opnds);
+  	'jecxz' -> jmp8_op_sizeof(Opnds);
+  	'leave' -> nullary_op_sizeof(Opnds);
+  	'loop' -> jmp8_op_sizeof(Opnds);
+  	'loope' -> jmp8_op_sizeof(Opnds);
+  	'loopne' -> jmp8_op_sizeof(Opnds);
+  	'nop' -> nullary_op_sizeof(Opnds);
+  	'prefix_fs' -> nullary_op_sizeof(Opnds);
+  	'ret' -> ret_sizeof(Opnds);
+  	'stc' -> nullary_op_sizeof(Opnds);
+  	'std' -> nullary_op_sizeof(Opnds);
+
+%% 	%% x87
+%% 	'fadd'   -> x87_arith_sizeof(Opnds);
+%% 	'faddp'  -> x87_arith_sizeof(Opnds);
+ 	'fchs'   -> fchs_sizeof();
+%% 	'fdiv'   -> x87_arith_sizeof(Opnds);
+%% 	'fdivp'  -> x87_arith_sizeof(Opnds);
+%% 	'fdivr'  -> x87_arith_sizeof(Opnds);
+%% 	'fdivrp' -> x87_arith_sizeof(Opnds);
+ 	'ffree'  -> ffree_sizeof();
+%% 	'fild'   -> fild_sizeof(Opnds);
+%% 	'fld'    -> fld_sizeof(Opnds);
+%% 	'fmul'   -> x87_arith_sizeof(Opnds);
+%% 	'fmulp'  -> x87_arith_sizeof(Opnds);
+%% 	'fst'    -> fst_sizeof(Opnds);
+%% 	'fstp'   -> fst_sizeof(Opnds);
+%% 	'fsub'   -> x87_arith_sizeof(Opnds);
+%% 	'fsubp'  -> x87_arith_sizeof(Opnds);
+%% 	'fsubr'  -> x87_arith_sizeof(Opnds);
+%% 	'fsubrp' -> x87_arith_sizeof(Opnds);
+ 	'fwait'  -> fwait_sizeof();
+ 	'fxch'   -> fxch_sizeof();	
+%% 	%% End of x87
+	_ -> %% Hack that is to be removed some day... Maybe...
+            {Bytes, _} = insn_encode(Op, Opnds, 0),
+            length(Bytes)
+%%	'adc' -> arith_binop_sizeof(Opnds);
+%%  	'add' -> arith_binop_sizeof(Opnds);
+%%  	'and' -> arith_binop_sizeof(Opnds);
+%%  	'bsf' -> bs_op_sizeof(Opnds);
+%%  	'bsr' -> bs_op_sizeof(Opnds);
+%%  	'bswap' -> bswap_sizeof(Opnds);
+%%  	'bt' -> bt_op_sizeof(Opnds);
+%%  	'btc' -> bt_op_sizeof(Opnds);
+%%  	'btr' -> bt_op_sizeof(Opnds);
+%%  	'bts' -> bt_op_sizeof(Opnds);
+%%  	'call' -> call_sizeof(Opnds);
+%%  	'cmovcc' -> cmovcc_sizeof(Opnds);
+%%  	'cmp' -> arith_binop_sizeof(Opnds);
+%%  	'dec' -> incdec_sizeof(Opnds);
+%%  	'div' -> arith_unop_sizeof(Opnds);
+%%  	'idiv' -> arith_unop_sizeof(Opnds);
+%%  	'imul' -> imul_sizeof(Opnds);
+%%  	'inc' -> incdec_sizeof(Opnds);
+%%  	'jmp' -> jmp_sizeof(Opnds);
+%%  	'lea' -> lea_sizeof(Opnds);
+%%  	'mov' -> mov_sizeof(Opnds);
+%%  	'movsx' -> movx_op_sizeof(Opnds);
+%%  	'movzx' -> movx_op_sizeof(Opnds);
+%%  	'mul' -> arith_unop_sizeof(Opnds);
+%%  	'neg' -> arith_unop_sizeof(Opnds);
+%%  	'not' -> arith_unop_sizeof(Opnds);
+%%  	'or' -> arith_binop_sizeof(Opnds);
+%%  	'pop' -> pop_sizeof(Opnds);
+%%  	'push' -> push_sizeof(Opnds);
+%%  	'rcl' -> shift_op_sizeof(Opnds);
+%%  	'rcr' -> shift_op_sizeof(Opnds);
+%%  	'rol' -> shift_op_sizeof(Opnds);
+%%  	'ror' -> shift_op_sizeof(Opnds);
+%%  	'sar' -> shift_op_sizeof(Opnds);
+%%  	'sbb' -> arith_binop_sizeof(Opnds);
+%%  	'setcc' -> setcc_sizeof(Opnds);
+%%  	'shl' -> shift_op_sizeof(Opnds);
+%%  	'shld' -> shd_op_sizeof(Opnds);
+%%  	'shr' -> shift_op_sizeof(Opnds);
+%%  	'shrd' -> shd_op_sizeof(Opnds);
+%% 	'sub' -> arith_binop_sizeof(Opnds);
+%%  	'test' -> test_sizeof(Opnds);
+%%  	'xor' -> arith_binop_sizeof(Opnds);
+%%	_ -> exit({?MODULE,insn_sizeof,Op})
+    end.
+
+%%=====================================================================
+%% testing interface
+%%=====================================================================
+
+-ifdef(DO_HIPE_AMD64_ENCODE_TEST).
+
+say(OS, Str) ->
+    file:write(OS, Str).
+
+digit16(Dig0) ->
+    Dig = Dig0 band 16#F,
+    if Dig >= 16#A -> $A + (Dig - 16#A);
+       true -> $0 + Dig
+    end.
+
+say_byte(OS, Byte) ->
+    say(OS, "0x"),
+    say(OS, [digit16(Byte bsr 4)]),
+    say(OS, [digit16(Byte)]).
+
+init(OS) ->
+    say(OS, "\t.text\n").
+
+say_bytes(OS, Byte0, Bytes0) ->
+    say_byte(OS, Byte0),
+    case Bytes0 of
+	[] ->
+	    say(OS, "\n");
+	[Byte1|Bytes1] ->
+	    say(OS, ","),
+	    say_bytes(OS, Byte1, Bytes1)
+    end.
+
+t(OS, Op, Opnds) ->
+    insn_sizeof(Op, Opnds),
+    {[Byte|Bytes],[]} = insn_encode(Op, Opnds, 0),
+    say(OS, "\t.byte "),
+    say_bytes(OS, Byte, Bytes).
+
+dotest1(OS) ->
+    init(OS),
+    % exercise all rm32 types
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_rip(16#87654321)}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_sib(sib(?ECX,sindex(2#10,?EDI)))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321)}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sindex(16#87654321,sindex(2#10,?EDI))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_base(?ECX)}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_sib(16#03,sib(?ECX,sindex(2#10,?EDI)))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp8_base(16#3,?ECX)}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_sib(16#87654321,sib(?ECX,sindex(2#10,?EDI)))}}),
+    t(OS,lea,{{reg32,?EAX},{ea,ea_disp32_base(16#87654321,?EBP)}}),
+    t(OS,call,{{rm32,rm_reg(?EAX)}}),
+    t(OS,call,{{rm32,rm_mem(ea_disp32_sindex(16#87654321,sindex(2#10,?EDI)))}}),
+    t(OS,call,{{rel32,-5}}),
+    % default parameters for the tests below
+    Word32 = 16#87654321,
+    Word16 = 16#F00F,
+    Word8 = 16#80,
+    Imm32 = {imm32,Word32},
+    Imm16 = {imm16,Word16},
+    Imm8 = {imm8,Word8},
+    RM32 = {rm32,rm_reg(?EDX)},
+    RM16 = {rm16,rm_reg(?EDX)},
+    RM8 = {rm8,rm_reg(?EDX)},
+    Rel32 = {rel32,Word32},
+    Rel8 = {rel8,Word8},
+    Moffs32 = {moffs32,Word32},
+    Moffs16 = {moffs16,Word32},
+    Moffs8 = {moffs8,Word32},
+    CC = {cc,?CC_G},
+    Reg32 = {reg32,?EAX},
+    Reg16 = {reg16,?EAX},
+    Reg8 = {reg8,?AH},
+    EA = {ea,ea_base(?ECX)},
+    % exercise each instruction definition
+    t(OS,'adc',{eax,Imm32}),
+    t(OS,'adc',{RM32,Imm32}),
+    t(OS,'adc',{RM32,Imm8}),
+    t(OS,'adc',{RM32,Reg32}),
+    t(OS,'adc',{Reg32,RM32}),
+    t(OS,'add',{eax,Imm32}),
+    t(OS,'add',{RM32,Imm32}),
+    t(OS,'add',{RM32,Imm8}),
+    t(OS,'add',{RM32,Reg32}),
+    t(OS,'add',{Reg32,RM32}),
+    t(OS,'and',{eax,Imm32}),
+    t(OS,'and',{RM32,Imm32}),
+    t(OS,'and',{RM32,Imm8}),
+    t(OS,'and',{RM32,Reg32}),
+    t(OS,'and',{Reg32,RM32}),
+    t(OS,'bsf',{Reg32,RM32}),
+    t(OS,'bsr',{Reg32,RM32}),
+    t(OS,'bswap',{Reg32}),
+    t(OS,'bt',{RM32,Reg32}),
+    t(OS,'bt',{RM32,Imm8}),
+    t(OS,'btc',{RM32,Reg32}),
+    t(OS,'btc',{RM32,Imm8}),
+    t(OS,'btr',{RM32,Reg32}),
+    t(OS,'btr',{RM32,Imm8}),
+    t(OS,'bts',{RM32,Reg32}),
+    t(OS,'bts',{RM32,Imm8}),
+    t(OS,'call',{Rel32}),
+    t(OS,'call',{RM32}),
+    t(OS,'cbw',{}),
+    t(OS,'cdq',{}),
+    t(OS,'clc',{}),
+    t(OS,'cld',{}),
+    t(OS,'cmc',{}),
+    t(OS,'cmovcc',{CC,Reg32,RM32}),
+    t(OS,'cmp',{eax,Imm32}),
+    t(OS,'cmp',{RM32,Imm32}),
+    t(OS,'cmp',{RM32,Imm8}),
+    t(OS,'cmp',{RM32,Reg32}),
+    t(OS,'cmp',{Reg32,RM32}),
+    t(OS,'cwde',{}),
+    t(OS,'dec',{RM32}),
+    t(OS,'dec',{Reg32}),
+    t(OS,'div',{RM32}),
+    t(OS,'enter',{Imm16,{imm8,3}}),
+    t(OS,'idiv',{RM32}),
+    t(OS,'imul',{RM32}),
+    t(OS,'imul',{Reg32,RM32}),
+    t(OS,'imul',{Reg32,RM32,Imm8}),
+    t(OS,'imul',{Reg32,RM32,Imm32}),
+    t(OS,'inc',{RM32}),
+    t(OS,'inc',{Reg32}),
+    t(OS,'into',{}),
+    t(OS,'jcc',{CC,Rel8}),
+    t(OS,'jcc',{CC,Rel32}),
+    t(OS,'jecxz',{Rel8}),
+    t(OS,'jmp',{Rel8}),
+    t(OS,'jmp',{Rel32}),
+    t(OS,'jmp',{RM32}),
+    t(OS,'lea',{Reg32,EA}),
+    t(OS,'leave',{}),
+    t(OS,'loop',{Rel8}),
+    t(OS,'loope',{Rel8}),
+    t(OS,'loopne',{Rel8}),
+    t(OS,'mov',{RM8,Reg8}),
+    t(OS,'mov',{RM16,Reg16}),
+    t(OS,'mov',{RM32,Reg32}),
+    t(OS,'mov',{Reg8,RM8}),
+    t(OS,'mov',{Reg16,RM16}),
+    t(OS,'mov',{Reg32,RM32}),
+    t(OS,'mov',{al,Moffs8}),
+    t(OS,'mov',{ax,Moffs16}),
+    t(OS,'mov',{eax,Moffs32}),
+    t(OS,'mov',{Moffs8,al}),
+    t(OS,'mov',{Moffs16,ax}),
+    t(OS,'mov',{Moffs32,eax}),
+    t(OS,'mov',{Reg8,Imm8}),
+    t(OS,'mov',{Reg16,Imm16}),
+    t(OS,'mov',{Reg32,Imm32}),
+    t(OS,'mov',{RM8,Imm8}),
+    t(OS,'mov',{RM16,Imm16}),
+    t(OS,'mov',{RM32,Imm32}),
+    t(OS,'movsx',{Reg16,RM8}),
+    t(OS,'movsx',{Reg32,RM8}),
+    t(OS,'movsx',{Reg32,RM16}),
+    t(OS,'movzx',{Reg16,RM8}),
+    t(OS,'movzx',{Reg32,RM8}),
+    t(OS,'movzx',{Reg32,RM16}),
+    t(OS,'mul',{RM32}),
+    t(OS,'neg',{RM32}),
+    t(OS,'nop',{}),
+    t(OS,'not',{RM32}),
+    t(OS,'or',{eax,Imm32}),
+    t(OS,'or',{RM32,Imm32}),
+    t(OS,'or',{RM32,Imm8}),
+    t(OS,'or',{RM32,Reg32}),
+    t(OS,'or',{Reg32,RM32}),
+    t(OS,'pop',{RM32}),
+    t(OS,'pop',{Reg32}),
+    t(OS,'push',{RM32}),
+    t(OS,'push',{Reg32}),
+    t(OS,'push',{Imm8}),
+    t(OS,'push',{Imm32}),
+    t(OS,'rcl',{RM32,1}),
+    t(OS,'rcl',{RM32,cl}),
+    t(OS,'rcl',{RM32,Imm8}),
+    t(OS,'rcr',{RM32,1}),
+    t(OS,'rcr',{RM32,cl}),
+    t(OS,'rcr',{RM32,Imm8}),
+    t(OS,'ret',{}),
+    t(OS,'ret',{Imm16}),
+    t(OS,'rol',{RM32,1}),
+    t(OS,'rol',{RM32,cl}),
+    t(OS,'rol',{RM32,Imm8}),
+    t(OS,'ror',{RM32,1}),
+    t(OS,'ror',{RM32,cl}),
+    t(OS,'ror',{RM32,Imm8}),
+    t(OS,'sar',{RM32,1}),
+    t(OS,'sar',{RM32,cl}),
+    t(OS,'sar',{RM32,Imm8}),
+    t(OS,'sbb',{eax,Imm32}),
+    t(OS,'sbb',{RM32,Imm32}),
+    t(OS,'sbb',{RM32,Imm8}),
+    t(OS,'sbb',{RM32,Reg32}),
+    t(OS,'sbb',{Reg32,RM32}),
+    t(OS,'setcc',{CC,RM8}),
+    t(OS,'shl',{RM32,1}),
+    t(OS,'shl',{RM32,cl}),
+    t(OS,'shl',{RM32,Imm8}),
+    t(OS,'shld',{RM32,Reg32,Imm8}),
+    t(OS,'shld',{RM32,Reg32,cl}),
+    t(OS,'shr',{RM32,1}),
+    t(OS,'shr',{RM32,cl}),
+    t(OS,'shr',{RM32,Imm8}),
+    t(OS,'shrd',{RM32,Reg32,Imm8}),
+    t(OS,'shrd',{RM32,Reg32,cl}),
+    t(OS,'stc',{}),
+    t(OS,'std',{}),
+    t(OS,'sub',{eax,Imm32}),
+    t(OS,'sub',{RM32,Imm32}),
+    t(OS,'sub',{RM32,Imm8}),
+    t(OS,'sub',{RM32,Reg32}),
+    t(OS,'sub',{Reg32,RM32}),
+    t(OS,'test',{eax,Imm32}),
+    t(OS,'test',{RM32,Imm32}),
+    t(OS,'test',{RM32,Reg32}),
+    t(OS,'xor',{eax,Imm32}),
+    t(OS,'xor',{RM32,Imm32}),
+    t(OS,'xor',{RM32,Imm8}),
+    t(OS,'xor',{RM32,Reg32}),
+    t(OS,'xor',{Reg32,RM32}),
+    t(OS,'prefix_fs',{}), t(OS,'add',{{reg32,?EAX},{rm32,rm_mem(ea_disp32_rip(16#20))}}),
+    [].
+
+dotest() -> dotest1(group_leader()).	% stdout == group_leader
+
+dotest(File) ->
+    {ok,OS} = file:open(File, [write]),
+    dotest1(OS),
+    file:close(OS).
+-endif.
diff --git a/lib/hipe/amd64/hipe_amd64_frame.erl b/lib/hipe/amd64/hipe_amd64_frame.erl
new file mode 100644
index 0000000000..2bc319f40f
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_frame.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_frame.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_liveness.erl b/lib/hipe/amd64/hipe_amd64_liveness.erl
new file mode 100644
index 0000000000..be878773cb
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_liveness.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_liveness.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_main.erl b/lib/hipe/amd64/hipe_amd64_main.erl
new file mode 100644
index 0000000000..4de7364170
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_main.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_main.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_pp.erl b/lib/hipe/amd64/hipe_amd64_pp.erl
new file mode 100644
index 0000000000..93ed7b9073
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_pp.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_pp.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra.erl b/lib/hipe/amd64/hipe_amd64_ra.erl
new file mode 100644
index 0000000000..b9ac0338f0
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra_finalise.erl b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl
new file mode 100644
index 0000000000..a6a787c340
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_finalise.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra_finalise.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_ls.erl
new file mode 100644
index 0000000000..7ff2a7c082
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_ls.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra_ls.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra_naive.erl b/lib/hipe/amd64/hipe_amd64_ra_naive.erl
new file mode 100644
index 0000000000..194ea8b597
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_naive.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra_naive.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl
new file mode 100644
index 0000000000..ef3c284c45
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_postconditions.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra_postconditions.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
new file mode 100644
index 0000000000..9ed3d01a56
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_sse2_postconditions.erl
@@ -0,0 +1,188 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-module(hipe_amd64_ra_sse2_postconditions).
+
+-export([check_and_rewrite/2]).
+
+-include("../x86/hipe_x86.hrl").
+-define(HIPE_INSTRUMENT_COMPILER, true).
+-include("../main/hipe.hrl").
+-define(count_temp(T), ?cons_counter(counter_mfa_mem_temps, T)).
+
+
+check_and_rewrite(AMD64Defun, Coloring) ->
+  %%io:format("Converting\n"),
+  TempMap = hipe_temp_map:cols2tuple(Coloring,hipe_amd64_specific_sse2),
+  %%io:format("Rewriting\n"),
+  #defun{code=Code0} = AMD64Defun,
+  {Code1, DidSpill} = do_insns(Code0, TempMap, [], false),
+  {AMD64Defun#defun{code=Code1, var_range={0, hipe_gensym:get_var(x86)}}, 
+   DidSpill}.
+
+do_insns([I|Insns], TempMap, Accum, DidSpill0) ->
+  {NewIs, DidSpill1} = do_insn(I, TempMap),
+  do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1);
+do_insns([], _TempMap, Accum, DidSpill) ->
+  {lists:reverse(Accum), DidSpill}.
+
+do_insn(I, TempMap) ->	% Insn -> {Insn list, DidSpill}
+  case I of
+    #fmove{} ->
+      do_fmove(I, TempMap);
+    #fp_unop{} ->
+      do_fp_unop(I, TempMap);
+    #fp_binop{} ->
+      do_fp_binop(I, TempMap);
+    _ ->
+      %% All non sse2 ops
+      {[I], false}
+  end.
+
+%%% Fix an fp_binop.
+do_fp_binop(I, TempMap) ->
+  #fp_binop{src=Src,dst=Dst} = I,
+  case is_mem_opnd(Dst, TempMap) of
+    true ->
+      Tmp = clone(Dst),
+      {[#fmove{src=Dst, dst=Tmp},
+	I#fp_binop{src=Src,dst=Tmp},
+	#fmove{src=Tmp,dst=Dst}],
+       true};
+    false ->
+      {[I], false}
+  end.
+
+do_fp_unop(I, TempMap) ->
+  #fp_unop{arg=Arg} = I,
+  case is_mem_opnd(Arg, TempMap) of
+    true ->
+      Tmp = clone(Arg),
+      {[#fmove{src=Arg, dst=Tmp},
+	I#fp_unop{arg=Tmp},
+	#fmove{src=Tmp,dst=Arg}],
+       true};
+    false ->
+      {[I], false}
+  end.
+
+%%% Fix an fmove op.
+do_fmove(I, TempMap) ->
+  #fmove{src=Src,dst=Dst} = I,
+  case is_mem_opnd(Dst, TempMap) and is_mem_opnd(Src, TempMap) of
+    true ->
+      Tmp = clone(Src),
+      {[#fmove{src=Src, dst=Tmp},I#fmove{src=Tmp,dst=Dst}],
+       true};
+    false ->
+      {[I], false}
+  end.
+
+%%% Check if an operand denotes a memory cell (mem or pseudo).
+
+is_mem_opnd(Opnd, TempMap) ->
+  R =
+    case Opnd of
+      #x86_mem{} -> true;
+      #x86_temp{} -> 
+	Reg = hipe_x86:temp_reg(Opnd),
+	case hipe_x86:temp_is_allocatable(Opnd) of
+	  true -> 
+	    case tuple_size(TempMap) > Reg of 
+	      true ->
+		case 
+		  hipe_temp_map:is_spilled(Reg, TempMap) of
+		  true ->
+		    ?count_temp(Reg),
+		    true;
+		  false -> false
+		end;
+	      _ -> false
+	    end;
+	  false -> true
+	end;
+      _ -> false
+    end,
+  %% io:format("Op ~w mem: ~w\n",[Opnd,R]),
+  R.
+
+%%% Check if an operand is a spilled Temp.
+
+%%src_is_spilled(Src, TempMap) ->
+%%  case hipe_x86:is_temp(Src) of
+%%    true ->
+%%      Reg = hipe_x86:temp_reg(Src),
+%%      case hipe_x86:temp_is_allocatable(Src) of
+%%	true -> 
+%%	  case tuple_size(TempMap) > Reg of 
+%%	    true ->
+%%	      case hipe_temp_map:is_spilled(Reg, TempMap) of
+%%		true ->
+%%		  ?count_temp(Reg),
+%%		  true;
+%%		false ->
+%%		  false
+%%	      end;
+%%	    false ->
+%%	      false
+%%	  end;
+%%	false -> true
+%%      end;
+%%    false -> false
+%%  end.
+
+%% is_spilled(Temp, TempMap) ->
+%%   case hipe_x86:temp_is_allocatable(Temp) of
+%%     true ->
+%%       Reg = hipe_x86:temp_reg(Temp),
+%%       case tuple_size(TempMap) > Reg of 
+%%  	true ->
+%%  	  case hipe_temp_map:is_spilled(Reg, TempMap) of
+%%  	    true ->
+%%  	      ?count_temp(Reg),
+%%  	      true;
+%%  	    false ->
+%%  	      false
+%%  	  end;
+%%  	false ->
+%%  	  false
+%%       end;
+%%     false -> true
+%%   end.
+
+%%% Make Reg a clone of Dst (attach Dst's type to Reg).
+
+clone(Dst) ->
+  Type =
+    case Dst of
+      #x86_mem{} -> hipe_x86:mem_type(Dst);
+      #x86_temp{} -> hipe_x86:temp_type(Dst)
+    end,
+  hipe_x86:mk_new_temp(Type).
+
+%%% Make a certain reg into a clone of Dst
+
+%% clone2(Dst, Reg) ->
+%%   Type =
+%%     case Dst of
+%%       #x86_mem{} -> hipe_x86:mem_type(Dst);
+%%       #x86_temp{} -> hipe_x86:temp_type(Dst)
+%%     end,
+%%   hipe_x86:mk_temp(Reg,Type).
diff --git a/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
new file mode 100644
index 0000000000..267f3335aa
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_ra_x87_ls.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_ra_x87_ls.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_registers.erl b/lib/hipe/amd64/hipe_amd64_registers.erl
new file mode 100644
index 0000000000..4c49eeb00a
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_registers.erl
@@ -0,0 +1,288 @@
+%% -*- erlang-indent-level: 2 -*-
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-module(hipe_amd64_registers).
+
+-export([
+ 	 all_precoloured/0,
+ 	 allocatable/0,
+	 allocatable_sse2/0,
+	 allocatable_x87/0,
+ 	 arg/1,
+         args/1,
+         call_clobbered/0,
+	 fcalls/0,
+	 float_size/0,
+	 first_virtual/0,
+ 	 heap_limit/0,
+ 	 is_arg/1,
+ 	 is_fixed/1,
+ 	 is_precoloured/1,
+ 	 is_precoloured_sse2/1,
+	 is_precoloured_x87/1,
+ 	 live_at_return/0,
+	 nr_args/0,
+ 	 proc_offset/1,
+ 	 proc_pointer/0,
+ 	 rax/0,
+         rcx/0,
+	 ret/1,
+         sp/0,
+         sp_limit_offset/0,
+	 reg_name/1,
+         alignment/0,
+ 	 tailcall_clobbered/0,
+ 	 temp0/0,
+	 temp1/0,
+	 %% fixed/0,
+	 wordsize/0
+	]).
+
+-include("../rtl/hipe_literals.hrl").
+
+-ifdef(AMD64_HP_IN_REGISTER).
+-export([heap_pointer/0]).
+-endif.
+
+-ifdef(AMD64_FCALLS_IN_REGISTER).
+fcalls_offset() -> false.
+-else.
+fcalls_offset() -> ?P_FCALLS.
+-define(AMD64_FCALLS_REGISTER,16).
+-endif.
+
+-ifdef(AMD64_HEAP_LIMIT_IN_REGISTER).
+heap_limit_offset() -> false.
+-else.
+-define(AMD64_HEAP_LIMIT_REGISTER, 17).
+heap_limit_offset() -> ?P_HP_LIMIT.
+-endif.
+
+
+-define(RAX,  0).
+-define(RCX,  1).
+-define(RDX,  2).
+-define(RBX,  3).
+-define(RSP,  4).
+-define(RBP,  5).
+-define(RSI,  6).
+-define(RDI,  7).
+-define(R8 ,  8).
+-define(R9 ,  9).
+-define(R10, 10).
+-define(R11, 11).
+-define(R12, 12).
+-define(R13, 13).
+-define(R14, 14).
+-define(R15, 15).
+-define(FCALLS,           ?AMD64_FCALLS_REGISTER).
+-define(HEAP_LIMIT,       ?AMD64_HEAP_LIMIT_REGISTER).
+-define(LAST_PRECOLOURED, 17).
+
+-define(ARG0, ?RSI).
+-define(ARG1, ?RDX).
+-define(ARG2, ?RCX).
+-define(ARG3, ?R8).
+-define(ARG4, ?R9).
+-define(ARG5, ?RDI).
+
+-define(TEMP0, ?R14).
+-define(TEMP1, ?R13).
+
+-define(PROC_POINTER, ?RBP).
+
+reg_name(R) ->
+  case R of
+    ?RAX -> "%rax";
+    ?RCX -> "%rcx";
+    ?RDX -> "%rdx";
+    ?RBX -> "%rbx";
+    ?RSP -> "%rsp";
+    ?RBP -> "%rbp";
+    ?RSI -> "%rsi";
+    ?RDI -> "%rdi";
+    ?FCALLS -> "%fcalls";
+    ?HEAP_LIMIT -> "%hplim";
+    Other -> "%r" ++ integer_to_list(Other)
+  end.
+
+alignment() -> 8.  
+
+float_size() -> 8.  
+
+first_virtual() -> ?LAST_PRECOLOURED + 1.
+
+is_precoloured(X) -> X =< ?LAST_PRECOLOURED.
+
+is_precoloured_sse2(X) -> X =< 15.
+
+is_precoloured_x87(X) -> X =< 6.
+
+all_precoloured() ->
+  [?RAX,
+   ?RCX,
+   ?RDX,
+   ?RBX,
+   ?RSP,
+   ?RBP,
+   ?RSI,
+   ?RDI,
+   ?R8 ,
+   ?R9 ,
+   ?R10,
+   ?R11,
+   ?R12,
+   ?R13,
+   ?R14,
+   ?R15,
+   ?FCALLS,
+   ?HEAP_LIMIT].
+
+rax() -> ?RAX.
+rcx() -> ?RCX.
+temp0() -> ?TEMP0.
+temp1() -> ?TEMP1.
+sp() -> ?RSP.
+proc_pointer() -> ?PROC_POINTER.
+fcalls() -> ?FCALLS.
+heap_limit() -> ?HEAP_LIMIT.
+
+
+-ifdef(AMD64_HP_IN_REGISTER).
+-define(HEAP_POINTER, ?AMD64_HEAP_POINTER).
+heap_pointer() -> ?HEAP_POINTER.
+-define(LIST_HP_LIVE_AT_RETURN,[{?HEAP_POINTER,untagged}]).
+is_heap_pointer(?HEAP_POINTER) -> true;
+is_heap_pointer(_) -> false.
+%% -define(LIST_HP_FIXED,[?HEAP_POINTER]).
+
+-else.
+-define(HEAP_POINTER, -1).
+is_heap_pointer(_) -> false.
+%% -define(LIST_HP_FIXED,[]).
+-define(LIST_HP_LIVE_AT_RETURN,[]).
+-endif.
+
+proc_offset(?FCALLS) -> fcalls_offset();
+proc_offset(?HEAP_LIMIT) -> heap_limit_offset();
+proc_offset(_) -> false.
+
+sp_limit_offset() -> ?P_NSP_LIMIT.
+
+is_fixed(?RSP) -> true;
+is_fixed(?PROC_POINTER) -> true;
+is_fixed(?FCALLS) -> true;
+is_fixed(?HEAP_LIMIT) -> true;
+is_fixed(R) -> is_heap_pointer(R).
+
+%% fixed() ->
+%%     [?ESP, ?PROC_POINTER, ?FCALLS, ?HEAP_LIMIT | ?LIST_HP_FIXED].
+
+allocatable() ->
+  [?RDX, ?RCX, ?RBX, ?RAX, ?RSI, ?RDI,
+   ?R8 , ?R9 , ?R10, ?R11, ?R12, ?R13, ?R14, ?R15]
+    -- [?FCALLS, ?HEAP_POINTER, ?HEAP_LIMIT].
+
+allocatable_sse2() ->
+  [00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15]. %% xmm0 - xmm15
+
+allocatable_x87() ->
+  [0,1,2,3,4,5,6].
+
+nr_args() -> ?AMD64_NR_ARG_REGS.
+
+arg(N) ->
+  if N < ?AMD64_NR_ARG_REGS ->
+      case N of
+	0 -> ?ARG0;
+	1 -> ?ARG1;
+	2 -> ?ARG2;
+	3 -> ?ARG3;
+	4 -> ?ARG4;
+	5 -> ?ARG5;
+	_ -> exit({?MODULE, arg, N})
+      end;
+     true ->
+      exit({?MODULE, arg, N})
+  end.
+
+is_arg(R) ->
+  case R of
+    ?ARG0 -> ?AMD64_NR_ARG_REGS > 0;
+    ?ARG1 -> ?AMD64_NR_ARG_REGS > 1;
+    ?ARG2 -> ?AMD64_NR_ARG_REGS > 2;
+    ?ARG3 -> ?AMD64_NR_ARG_REGS > 3;
+    ?ARG4 -> ?AMD64_NR_ARG_REGS > 4;
+    ?ARG5 -> ?AMD64_NR_ARG_REGS > 5;
+    _ -> false
+  end.
+
+args(Arity) when is_integer(Arity), Arity >= 0 ->
+  N = erlang:min(Arity, ?AMD64_NR_ARG_REGS),
+  args(N-1, []).
+
+args(I, Rest) when I < 0 -> Rest;
+args(I, Rest) -> args(I-1, [arg(I) | Rest]).
+
+ret(N) ->
+  case N of
+    0 -> ?RAX;
+    _ -> exit({?MODULE, ret, N})
+  end.
+
+call_clobbered() ->
+  [{?RAX,tagged},{?RAX,untagged},	% does the RA strip the type or not?
+   {?RDX,tagged},{?RDX,untagged},
+   {?RCX,tagged},{?RCX,untagged},
+   {?RBX,tagged},{?RBX,untagged},
+   {?RDI,tagged},{?RDI,untagged},
+   {?RSI,tagged},{?RSI,untagged},
+   {?R8 ,tagged},{?R8 ,untagged},
+   {?R9 ,tagged},{?R9 ,untagged},
+   {?R10,tagged},{?R10,untagged},
+   {?R11,tagged},{?R11,untagged},
+   {?R12,tagged},{?R12,untagged},
+   {?R13,tagged},{?R13,untagged},
+   {?R14,tagged},{?R14,untagged},
+   {?R15,tagged},{?R15,untagged}
+   | fp_call_clobbered()]
+    --
+    [{?FCALLS,tagged},{?FCALLS,untagged},
+     {?HEAP_POINTER,tagged},{?HEAP_POINTER,untagged},
+     {?HEAP_LIMIT,tagged},{?HEAP_LIMIT,untagged}
+    ].
+
+fp_call_clobbered() -> %% sse2 since it has more registers than x87
+  [{Reg,double} || Reg <- allocatable_sse2()].
+
+tailcall_clobbered() ->		% tailcall crapola needs two temps
+  [{?TEMP0,tagged},{?TEMP0,untagged},
+   {?TEMP1,tagged},{?TEMP1,untagged}
+  | fp_call_clobbered()].
+
+live_at_return() ->
+    [{?RSP,untagged}
+     ,{?PROC_POINTER,untagged}
+     ,{?FCALLS,untagged}
+     ,{?HEAP_LIMIT,untagged}
+     | ?LIST_HP_LIVE_AT_RETURN
+    ].
+
+wordsize() -> 8.
diff --git a/lib/hipe/amd64/hipe_amd64_spill_restore.erl b/lib/hipe/amd64/hipe_amd64_spill_restore.erl
new file mode 100644
index 0000000000..56e3ffd24d
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_spill_restore.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_spill_restore.erl").
diff --git a/lib/hipe/amd64/hipe_amd64_x87.erl b/lib/hipe/amd64/hipe_amd64_x87.erl
new file mode 100644
index 0000000000..e7bf1c1866
--- /dev/null
+++ b/lib/hipe/amd64/hipe_amd64_x87.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_x86_x87.erl").
diff --git a/lib/hipe/amd64/hipe_rtl_to_amd64.erl b/lib/hipe/amd64/hipe_rtl_to_amd64.erl
new file mode 100644
index 0000000000..17aef0eeac
--- /dev/null
+++ b/lib/hipe/amd64/hipe_rtl_to_amd64.erl
@@ -0,0 +1,20 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2004-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-include("../x86/hipe_rtl_to_x86.erl").
author	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
committer	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
commit	84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree	bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/amd64
download	otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip