From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/hipe/ppc/Makefile | 120 ++ lib/hipe/ppc/hipe_ppc.erl | 415 +++++++ lib/hipe/ppc/hipe_ppc.hrl | 118 ++ lib/hipe/ppc/hipe_ppc_assemble.erl | 603 +++++++++ lib/hipe/ppc/hipe_ppc_cfg.erl | 131 ++ lib/hipe/ppc/hipe_ppc_defuse.erl | 145 +++ lib/hipe/ppc/hipe_ppc_encode.erl | 1558 ++++++++++++++++++++++++ lib/hipe/ppc/hipe_ppc_finalise.erl | 65 + lib/hipe/ppc/hipe_ppc_frame.erl | 657 ++++++++++ lib/hipe/ppc/hipe_ppc_liveness_all.erl | 38 + lib/hipe/ppc/hipe_ppc_liveness_fpr.erl | 34 + lib/hipe/ppc/hipe_ppc_liveness_gpr.erl | 38 + lib/hipe/ppc/hipe_ppc_main.erl | 51 + lib/hipe/ppc/hipe_ppc_pp.erl | 350 ++++++ lib/hipe/ppc/hipe_ppc_ra.erl | 56 + lib/hipe/ppc/hipe_ppc_ra_finalise.erl | 271 +++++ lib/hipe/ppc/hipe_ppc_ra_ls.erl | 56 + lib/hipe/ppc/hipe_ppc_ra_naive.erl | 29 + lib/hipe/ppc/hipe_ppc_ra_postconditions.erl | 243 ++++ lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl | 130 ++ lib/hipe/ppc/hipe_ppc_registers.erl | 246 ++++ lib/hipe/ppc/hipe_rtl_to_ppc.erl | 1249 +++++++++++++++++++ 22 files changed, 6603 insertions(+) create mode 100644 lib/hipe/ppc/Makefile create mode 100644 lib/hipe/ppc/hipe_ppc.erl create mode 100644 lib/hipe/ppc/hipe_ppc.hrl create mode 100644 lib/hipe/ppc/hipe_ppc_assemble.erl create mode 100644 lib/hipe/ppc/hipe_ppc_cfg.erl create mode 100644 lib/hipe/ppc/hipe_ppc_defuse.erl create mode 100644 lib/hipe/ppc/hipe_ppc_encode.erl create mode 100644 lib/hipe/ppc/hipe_ppc_finalise.erl create mode 100644 lib/hipe/ppc/hipe_ppc_frame.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_all.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_fpr.erl create mode 100644 lib/hipe/ppc/hipe_ppc_liveness_gpr.erl create mode 100644 lib/hipe/ppc/hipe_ppc_main.erl create mode 100644 lib/hipe/ppc/hipe_ppc_pp.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_finalise.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_ls.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_naive.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_postconditions.erl create mode 100644 lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl create mode 100644 lib/hipe/ppc/hipe_ppc_registers.erl create mode 100644 lib/hipe/ppc/hipe_rtl_to_ppc.erl (limited to 'lib/hipe/ppc') diff --git a/lib/hipe/ppc/Makefile b/lib/hipe/ppc/Makefile new file mode 100644 index 0000000000..0857043527 --- /dev/null +++ b/lib/hipe/ppc/Makefile @@ -0,0 +1,120 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2004-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +ifndef DOCS +DOCS = ../doc +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +# Please keep this list sorted. +MODULES=hipe_ppc \ + hipe_ppc_assemble \ + hipe_ppc_cfg \ + hipe_ppc_defuse \ + hipe_ppc_encode \ + hipe_ppc_finalise \ + hipe_ppc_frame \ + hipe_ppc_liveness_all \ + hipe_ppc_liveness_fpr \ + hipe_ppc_liveness_gpr \ + hipe_ppc_main \ + hipe_ppc_pp \ + hipe_ppc_ra \ + hipe_ppc_ra_finalise \ + hipe_ppc_ra_ls \ + hipe_ppc_ra_naive \ + hipe_ppc_ra_postconditions \ + hipe_ppc_ra_postconditions_fp \ + hipe_ppc_registers \ + hipe_rtl_to_ppc + +HRL_FILES=hipe_ppc.hrl +ERL_FILES=$(MODULES:%=%.erl) +TARGET_FILES=$(MODULES:%=$(EBIN)/%.$(EMULATOR)) +DOC_FILES= $(MODULES:%=$(DOCS)/%.html) + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +warn_exported_vars + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: $(DOC_FILES) + +clean: + rm -f $(TARGET_FILES) + rm -f core + +$(DOCS)/%.html:%.erl + erl -noshell -run edoc_run file '"$<"' '[{dir, "$(DOCS)"}]' -s init stop + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + +# Please keep this list sorted. +$(EBIN)/hipe_ppc_assemble.beam: ../main/hipe.hrl ../../kernel/src/hipe_ext_format.hrl ../rtl/hipe_literals.hrl ../misc/hipe_sdi.hrl +$(EBIN)/hipe_ppc_cfg.beam: ../flow/cfg.hrl ../flow/cfg.inc +$(EBIN)/hipe_ppc_frame.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_ppc_liveness_all.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_liveness_fpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_liveness_gpr.beam: ../flow/liveness.inc +$(EBIN)/hipe_ppc_registers.beam: ../rtl/hipe_literals.hrl +$(EBIN)/hipe_rtl_to_ppc.beam: ../rtl/hipe_rtl.hrl + +$(TARGET_FILES): hipe_ppc.hrl ../misc/hipe_consttab.hrl diff --git a/lib/hipe/ppc/hipe_ppc.erl b/lib/hipe/ppc/hipe_ppc.erl new file mode 100644 index 0000000000..047e86c45b --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc.erl @@ -0,0 +1,415 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + + +-module(hipe_ppc). +-export([ + mk_temp/2, + mk_new_temp/1, + mk_new_nonallocatable_temp/1, + is_temp/1, + temp_reg/1, + temp_type/1, + temp_is_allocatable/1, + temp_is_precoloured/1, + + mk_simm16/1, + mk_uimm16/1, + + mk_mfa/3, + + mk_prim/1, + is_prim/1, + prim_prim/1, + + mk_sdesc/4, + + mk_alu/4, + + mk_b_fun/2, + + mk_b_label/1, + + mk_bc/3, + + mk_bctr/1, + + mk_bctrl/1, + + mk_bl/3, + + mk_blr/0, + + mk_cmp/3, + + mk_comment/1, + + mk_label/1, + is_label/1, + label_label/1, + + mk_li/2, + mk_li/3, + mk_addi/4, + + mk_load/4, + mk_loadx/4, + mk_load/6, + ldop_to_ldxop/1, + + mk_mfspr/2, + + mk_mtcr/1, + + mk_mtspr/2, + + mk_pseudo_bc/4, + negate_bcond/1, + + mk_pseudo_call/4, + pseudo_call_contlab/1, + pseudo_call_func/1, + pseudo_call_sdesc/1, + pseudo_call_linkage/1, + + mk_pseudo_call_prepare/1, + pseudo_call_prepare_nrstkargs/1, + + mk_pseudo_li/2, + + mk_pseudo_move/2, + is_pseudo_move/1, + pseudo_move_dst/1, + pseudo_move_src/1, + + mk_pseudo_tailcall/4, + pseudo_tailcall_func/1, + pseudo_tailcall_stkargs/1, + pseudo_tailcall_linkage/1, + + mk_pseudo_tailcall_prepare/0, + + mk_store/4, + mk_storex/4, + mk_store/6, + stop_to_stxop/1, + + mk_unary/3, + + mk_lfd/3, + mk_lfdx/3, + mk_fload/4, + + %% mk_stfd/3, + mk_stfdx/3, + mk_fstore/4, + + mk_fp_binary/4, + + mk_fp_unary/3, + + mk_pseudo_fmove/2, + is_pseudo_fmove/1, + pseudo_fmove_dst/1, + pseudo_fmove_src/1, + + mk_defun/8, + defun_mfa/1, + defun_formals/1, + defun_is_closure/1, + defun_is_leaf/1, + defun_code/1, + defun_data/1, + defun_var_range/1]). + +-include("hipe_ppc.hrl"). + +mk_temp(Reg, Type, Allocatable) -> + #ppc_temp{reg=Reg, type=Type, allocatable=Allocatable}. +mk_temp(Reg, Type) -> mk_temp(Reg, Type, true). +mk_new_temp(Type, Allocatable) -> + mk_temp(hipe_gensym:get_next_var(ppc), Type, Allocatable). +mk_new_temp(Type) -> mk_new_temp(Type, true). +mk_new_nonallocatable_temp(Type) -> mk_new_temp(Type, false). +is_temp(X) -> case X of #ppc_temp{} -> true; _ -> false end. +temp_reg(#ppc_temp{reg=Reg}) -> Reg. +temp_type(#ppc_temp{type=Type}) -> Type. +temp_is_allocatable(#ppc_temp{allocatable=A}) -> A. +temp_is_precoloured(#ppc_temp{reg=Reg,type=Type}) -> + case Type of + 'double' -> hipe_ppc_registers:is_precoloured_fpr(Reg); + _ -> hipe_ppc_registers:is_precoloured_gpr(Reg) + end. + +mk_simm16(Value) -> #ppc_simm16{value=Value}. +mk_uimm16(Value) -> #ppc_uimm16{value=Value}. + +mk_mfa(M, F, A) -> #ppc_mfa{m=M, f=F, a=A}. + +mk_prim(Prim) -> #ppc_prim{prim=Prim}. +is_prim(X) -> case X of #ppc_prim{} -> true; _ -> false end. +prim_prim(#ppc_prim{prim=Prim}) -> Prim. + +mk_sdesc(ExnLab, FSize, Arity, Live) -> + #ppc_sdesc{exnlab=ExnLab, fsize=FSize, arity=Arity, live=Live}. + +mk_alu(AluOp, Dst, Src1, Src2) -> + #alu{aluop=AluOp, dst=Dst, src1=Src1, src2=Src2}. + +mk_b_fun(Fun, Linkage) -> #b_fun{'fun'=Fun, linkage=Linkage}. + +mk_b_label(Label) -> #b_label{label=Label}. + +mk_bc(BCond, Label, Pred) -> #bc{bcond=BCond, label=Label, pred=Pred}. + +mk_bctr(Labels) -> #bctr{labels=Labels}. + +mk_bctrl(SDesc) -> #bctrl{sdesc=SDesc}. + +mk_bl(Fun, SDesc, Linkage) -> #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage}. + +mk_blr() -> #blr{}. + +mk_cmp(CmpOp, Src1, Src2) -> #cmp{cmpop=CmpOp, src1=Src1, src2=Src2}. + +mk_comment(Term) -> #comment{term=Term}. + +mk_label(Label) -> #label{label=Label}. +is_label(I) -> case I of #label{} -> true; _ -> false end. +label_label(#label{label=Label}) -> Label. + +%%% Load an integer constant into a register. +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Tail) -> + R0 = mk_temp(0, 'untagged'), + mk_addi(Dst, R0, Value, Tail). + +mk_addi(Dst, R0, Value, Tail) -> + Low = at_l(Value), + High = at_ha(Value), + case High of + 0 -> + [mk_alu('addi', Dst, R0, mk_simm16(Low)) | + Tail]; + _ -> + case Low of + 0 -> + [mk_alu('addis', Dst, R0, mk_simm16(High)) | + Tail]; + _ -> + [mk_alu('addi', Dst, R0, mk_simm16(Low)), + mk_alu('addis', Dst, Dst, mk_simm16(High)) | + Tail] + end + end. + +at_l(Value) -> + simm16sext(Value band 16#FFFF). + +at_ha(Value) -> + simm16sext(((Value + 16#8000) bsr 16) band 16#FFFF). + +simm16sext(Value) -> + if Value >= 32768 -> (-1 bsl 16) bor Value; + true -> Value + end. + +mk_li_new(Dst, Value, Tail) -> % Dst may be R0 + R0 = mk_temp(0, 'untagged'), + case at_ha(Value) of + 0 -> + %% Value[31:16] are the sign-extension of Value[15]. + %% Use a single addi to load and sign-extend 16 bits. + [mk_alu('addi', Dst, R0, mk_simm16(at_l(Value))) | + Tail]; + _ -> + %% Use addis to load the high 16 bits, followed by an + %% optional ori to load non sign-extended low 16 bits. + High = simm16sext((Value bsr 16) band 16#FFFF), + [mk_alu('addis', Dst, R0, mk_simm16(High)) | + case (Value band 16#FFFF) of + 0 -> Tail; + Low -> + [mk_alu('ori', Dst, Dst, mk_uimm16(Low)) | + Tail] + end] + end. + +mk_load(LDop, Dst, Disp, Base) -> + #load{ldop=LDop, dst=Dst, disp=Disp, base=Base}. + +mk_loadx(LdxOp, Dst, Base1, Base2) -> + #loadx{ldxop=LdxOp, dst=Dst, base1=Base1, base2=Base2}. + +mk_load(LdOp, Dst, Offset, Base, Scratch, Rest) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_load(LdOp, Dst, Offset, Base) | Rest]; + true -> + LdxOp = ldop_to_ldxop(LdOp), + Index = + begin + DstReg = temp_reg(Dst), + BaseReg = temp_reg(Base), + if DstReg =/= BaseReg -> Dst; + true -> mk_scratch(Scratch) + end + end, + mk_li_new(Index, Offset, + [mk_loadx(LdxOp, Dst, Base, Index) | Rest]) + end. + +ldop_to_ldxop(LdOp) -> + case LdOp of + 'lbz' -> 'lbzx'; + 'lha' -> 'lhax'; + 'lhz' -> 'lhzx'; + 'lwz' -> 'lwzx' + end. + +mk_scratch(Scratch) -> + case Scratch of + 0 -> mk_temp(0, 'untagged'); + 'new' -> mk_new_temp('untagged') + end. + +mk_mfspr(Dst, Spr) -> #mfspr{dst=Dst, spr=Spr}. + +mk_mtcr(Src) -> #mtcr{src=Src}. + +mk_mtspr(Spr, Src) -> #mtspr{spr=Spr, src=Src}. + +mk_pseudo_bc(BCond, TrueLab, FalseLab, Pred) -> + if Pred >= 0.5 -> + mk_pseudo_bc_simple(negate_bcond(BCond), FalseLab, + TrueLab, 1.0-Pred); + true -> + mk_pseudo_bc_simple(BCond, TrueLab, FalseLab, Pred) + end. + +mk_pseudo_bc_simple(BCond, TrueLab, FalseLab, Pred) when Pred =< 0.5 -> + #pseudo_bc{bcond=BCond, true_label=TrueLab, + false_label=FalseLab, pred=Pred}. + +negate_bcond(BCond) -> + case BCond of + 'lt' -> 'ge'; + 'ge' -> 'lt'; + 'gt' -> 'le'; + 'le' -> 'gt'; + 'eq' -> 'ne'; + 'ne' -> 'eq'; + 'so' -> 'ns'; + 'ns' -> 'so' + end. + +mk_pseudo_call(FunC, SDesc, ContLab, Linkage) -> + #pseudo_call{func=FunC, sdesc=SDesc, contlab=ContLab, linkage=Linkage}. +pseudo_call_func(#pseudo_call{func=FunC}) -> FunC. +pseudo_call_sdesc(#pseudo_call{sdesc=SDesc}) -> SDesc. +pseudo_call_contlab(#pseudo_call{contlab=ContLab}) -> ContLab. +pseudo_call_linkage(#pseudo_call{linkage=Linkage}) -> Linkage. + +mk_pseudo_call_prepare(NrStkArgs) -> + #pseudo_call_prepare{nrstkargs=NrStkArgs}. +pseudo_call_prepare_nrstkargs(#pseudo_call_prepare{nrstkargs=NrStkArgs}) -> + NrStkArgs. + +mk_pseudo_li(Dst, Imm) -> #pseudo_li{dst=Dst, imm=Imm}. + +mk_pseudo_move(Dst, Src) -> #pseudo_move{dst=Dst, src=Src}. +is_pseudo_move(I) -> case I of #pseudo_move{} -> true; _ -> false end. +pseudo_move_dst(#pseudo_move{dst=Dst}) -> Dst. +pseudo_move_src(#pseudo_move{src=Src}) -> Src. + +mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage) -> + #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage}. +pseudo_tailcall_func(#pseudo_tailcall{func=FunC}) -> FunC. +pseudo_tailcall_stkargs(#pseudo_tailcall{stkargs=StkArgs}) -> StkArgs. +pseudo_tailcall_linkage(#pseudo_tailcall{linkage=Linkage}) -> Linkage. + +mk_pseudo_tailcall_prepare() -> #pseudo_tailcall_prepare{}. + +mk_store(STop, Src, Disp, Base) -> + #store{stop=STop, src=Src, disp=Disp, base=Base}. + +mk_storex(StxOp, Src, Base1, Base2) -> + #storex{stxop=StxOp, src=Src, base1=Base1, base2=Base2}. + +mk_store(StOp, Src, Offset, Base, Scratch, Rest)when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_store(StOp, Src, Offset, Base) | Rest]; + true -> + StxOp = stop_to_stxop(StOp), + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, + [mk_storex(StxOp, Src, Base, Index) | Rest]) + end. + +stop_to_stxop(StOp) -> + case StOp of + 'stb' -> 'stbx'; + 'sth' -> 'sthx'; + 'stw' -> 'stwx' + end. + +mk_unary(UnOp, Dst, Src) -> #unary{unop=UnOp, dst=Dst, src=Src}. + +mk_lfd(Dst, Disp, Base) -> #lfd{dst=Dst, disp=Disp, base=Base}. +mk_lfdx(Dst, Base1, Base2) -> #lfdx{dst=Dst, base1=Base1, base2=Base2}. +mk_fload(Dst, Offset, Base, Scratch) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_lfd(Dst, Offset, Base)]; + true -> + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, [mk_lfdx(Dst, Base, Index)]) + end. + +mk_stfd(Src, Disp, Base) -> #stfd{src=Src, disp=Disp, base=Base}. +mk_stfdx(Src, Base1, Base2) -> #stfdx{src=Src, base1=Base1, base2=Base2}. +mk_fstore(Src, Offset, Base, Scratch) when is_integer(Offset) -> + if Offset >= -32768, Offset =< 32767 -> + [mk_stfd(Src, Offset, Base)]; + true -> + Index = mk_scratch(Scratch), + mk_li_new(Index, Offset, [mk_stfdx(Src, Base, Index)]) + end. + +mk_fp_binary(FpBinOp, Dst, Src1, Src2) -> + #fp_binary{fp_binop=FpBinOp, dst=Dst, src1=Src1, src2=Src2}. + +mk_fp_unary(FpUnOp, Dst, Src) -> #fp_unary{fp_unop=FpUnOp, dst=Dst, src=Src}. + +mk_pseudo_fmove(Dst, Src) -> #pseudo_fmove{dst=Dst, src=Src}. +is_pseudo_fmove(I) -> case I of #pseudo_fmove{} -> true; _ -> false end. +pseudo_fmove_dst(#pseudo_fmove{dst=Dst}) -> Dst. +pseudo_fmove_src(#pseudo_fmove{src=Src}) -> Src. + +mk_defun(MFA, Formals, IsClosure, IsLeaf, Code, Data, VarRange, LabelRange) -> + #defun{mfa=MFA, formals=Formals, code=Code, data=Data, + isclosure=IsClosure, isleaf=IsLeaf, + var_range=VarRange, label_range=LabelRange}. +defun_mfa(#defun{mfa=MFA}) -> MFA. +defun_formals(#defun{formals=Formals}) -> Formals. +defun_is_closure(#defun{isclosure=IsClosure}) -> IsClosure. +defun_is_leaf(#defun{isleaf=IsLeaf}) -> IsLeaf. +defun_code(#defun{code=Code}) -> Code. +defun_data(#defun{data=Data}) -> Data. +defun_var_range(#defun{var_range=VarRange}) -> VarRange. diff --git a/lib/hipe/ppc/hipe_ppc.hrl b/lib/hipe/ppc/hipe_ppc.hrl new file mode 100644 index 0000000000..25e7ae0b5f --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc.hrl @@ -0,0 +1,118 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% + + +%%%-------------------------------------------------------------------- +%%% Basic Values: +%%% +%%% temp ::= {ppc_temp, reg, type, allocatable} +%%% reg ::= +%%% type ::= tagged | untagged +%%% allocatable ::= true | false +%%% +%%% sdesc ::= {ppc_sdesc, exnlab, fsize, arity, live} +%%% exnlab ::= [] | label +%%% fsize ::= int32 (frame size in words) +%%% live ::= (word offsets) +%%% arity ::= uint8 +%%% +%%% mfa ::= {ppc_mfa, atom, atom, arity} +%%% prim ::= {ppc_prim, atom} + +-record(ppc_mfa, {m::atom(), f::atom(), a::arity()}). +-record(ppc_prim, {prim}). +-record(ppc_sdesc, {exnlab, fsize, arity::arity(), live}). +-record(ppc_simm16, {value}). +-record(ppc_temp, {reg, type, allocatable}). +-record(ppc_uimm16, {value}). + +%%% Instruction Operands: +%%% +%%% aluop ::= add | add. | addi | addic. | addis | addo. | subf | subf. | subfo. +%%% | and | and. | andi. | or | or. | ori | xor | xor. | xori +%%% | slw | slw. | slwi | slwi. | srw | srw. | srwi | srwi. +%%% | sraw | sraw. | srawi | srawi. | mulli | mullw | mullw. | mullwo. +%%% bcond ::= eq | ne | gt | ge | lt | le | so | ns +%%% cmpop ::= cmp | cmpi | cmpl | cmpli +%%% ldop ::= lbz | lha | lhz | lwz +%%% ldxop ::= lbzx | lhax | lhzx | lwzx | lhbrx | lwbrx +%%% stop ::= stb | stw (HW has sth, but we don't use it) +%%% stxop ::= stbx | stwx (HW has sthx/sthbrx/stwbrx, but we don't use them) +%%% unop ::= extsb | extsh | {rlwinm,SH,MB,ME} | {rlwinm.,SH,MB,ME} +%%% +%%% immediate ::= int32 | atom | {label, label_type} +%%% label_type ::= constant | closure | c_const +%%% +%%% dst ::= temp +%%% src ::= temp +%%% | simm16 | uimm16 (only in alu.src2, cmp.src2) +%%% base ::= temp +%%% disp ::= sint16 (untagged simm16) +%%% +%%% fun ::= mfa | prim +%%% func ::= mfa | prim | 'ctr' +%%% +%%% spr ::= ctr | lr | xer + +%%% Instructions: + +-record(alu, {aluop, dst, src1, src2}). +-record(b_fun, {'fun', linkage}). % known tailcall +-record(b_label, {label}). % local jump, unconditional +-record(bc, {bcond, label, pred}). % local jump, conditional +-record(bctr, {labels}). % computed tailcall or switch +-record(bctrl, {sdesc}). % computed recursive call +-record(bl, {'fun', sdesc, linkage}). % known recursive call +-record(blr, {}). % unconditional bclr (return) +-record(cmp, {cmpop, src1, src2}). +-record(comment, {term}). +-record(label, {label}). +-record(load, {ldop, dst, disp, base}). % non-indexed, non-update form +-record(loadx, {ldxop, dst, base1, base2}). % indexed, non-update form +-record(mfspr, {dst, spr}). % for reading LR and XER +-record(mtcr, {src}). % for copying XER[CA] to CR0[EQ] via a temp +-record(mtspr, {spr, src}). % for writing LR, CTR, and XER +-record(pseudo_bc, {bcond, true_label, false_label, pred}). +-record(pseudo_call, {func, sdesc, contlab, linkage}). +-record(pseudo_call_prepare, {nrstkargs}). +-record(pseudo_li, {dst, imm}). +-record(pseudo_move, {dst, src}). +-record(pseudo_tailcall, {func, arity, stkargs, linkage}). +-record(pseudo_tailcall_prepare, {}). +-record(store, {stop, src, disp, base}). % non-indexed, non-update form +-record(storex, {stxop, src, base1, base2}).% indexed, non-update form +-record(unary, {unop, dst, src}). +-record(lfd, {dst, disp, base}). +-record(lfdx, {dst, base1, base2}). +-record(stfd, {src, disp, base}). +-record(stfdx, {src, base1, base2}). +-record(fp_binary, {fp_binop, dst, src1, src2}). +-record(fp_unary, {fp_unop, dst, src}). +-record(pseudo_fmove, {dst, src}). + +%%% Function definitions. + +-include("../misc/hipe_consttab.hrl"). + +-record(defun, {mfa :: mfa(), formals, code, + data :: hipe_consttab(), + isclosure :: boolean(), + isleaf :: boolean(), + var_range, label_range}). diff --git a/lib/hipe/ppc/hipe_ppc_assemble.erl b/lib/hipe/ppc/hipe_ppc_assemble.erl new file mode 100644 index 0000000000..6f06f8b841 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_assemble.erl @@ -0,0 +1,603 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + + +-module(hipe_ppc_assemble). +-export([assemble/4]). + +-include("../main/hipe.hrl"). % for VERSION_STRING, when_option +-include("hipe_ppc.hrl"). +-include("../../kernel/src/hipe_ext_format.hrl"). +-include("../rtl/hipe_literals.hrl"). +-include("../misc/hipe_sdi.hrl"). +-undef(ASSERT). +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +assemble(CompiledCode, Closures, Exports, Options) -> + print("****************** Assembling *******************\n", [], Options), + %% + Code = [{MFA, + hipe_ppc:defun_code(Defun), + hipe_ppc:defun_data(Defun)} + || {MFA, Defun} <- CompiledCode], + %% + {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = + hipe_pack_constants:pack_constants(Code, 4), + %% + {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = + encode(translate(Code, ConstMap), Options), + print("Total num bytes=~w\n", [CodeSize], Options), + %% + SC = hipe_pack_constants:slim_constmap(ConstMap), + DataRelocs = mk_data_relocs(RefsFromConsts, LabelMap), + SSE = slim_sorted_exportmap(ExportMap,Closures,Exports), + SlimRefs = hipe_pack_constants:slim_refs(AccRefs), + Bin = term_to_binary([{?VERSION_STRING(),?HIPE_SYSTEM_CRC}, + ConstAlign, ConstSize, + SC, + DataRelocs, % nee LM, LabelMap + SSE, + CodeSize,CodeBinary,SlimRefs, + 0,[] % ColdCodeSize, SlimColdRefs + ]), + %% + Bin. + +%%% +%%% Assembly Pass 1. +%%% Process initial {MFA,Code,Data} list. +%%% Translate each MFA's body, choosing operand & instruction kinds. +%%% +%%% Assembly Pass 2. +%%% Perform short/long form optimisation for jumps. +%%% +%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. +%%% + +translate(Code, ConstMap) -> + translate_mfas(Code, ConstMap, []). + +translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode) -> + {NewInsns,CodeSize,LabelMap} = + translate_insns(Insns, MFA, ConstMap, hipe_sdi:pass1_init(), 0, []), + translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode]); +translate_mfas([], _ConstMap, NewCode) -> + lists:reverse(NewCode). + +translate_insns([I|Insns], MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewIs = translate_insn(I, MFA, ConstMap), + add_insns(NewIs, Insns, MFA, ConstMap, SdiPass1, Address, NewInsns); +translate_insns([], _MFA, _ConstMap, SdiPass1, Address, NewInsns) -> + {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), + {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. + +add_insns([I|Is], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + NewSdiPass1 = + case I of + {'.label',L,_} -> + hipe_sdi:pass1_add_label(SdiPass1, Address, L); + {bc_sdi,{_,{label,L},_},_} -> + SdiInfo = #sdi_info{incr=(8-4),lb=-16#2000*4,ub=16#1FFF*4}, + hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); + _ -> + SdiPass1 + end, + Address1 = Address + insn_size(I), + add_insns(Is, Insns, MFA, ConstMap, NewSdiPass1, Address1, [I|NewInsns]); +add_insns([], Insns, MFA, ConstMap, SdiPass1, Address, NewInsns) -> + translate_insns(Insns, MFA, ConstMap, SdiPass1, Address, NewInsns). + +insn_size(I) -> + case I of + {'.label',_,_} -> 0; + {'.reloc',_,_} -> 0; + _ -> 4 % bc_sdi included in this case + end. + +translate_insn(I, MFA, ConstMap) -> % -> [{Op,Opnd,OrigI}] + case I of + #alu{} -> do_alu(I); + #b_fun{} -> do_b_fun(I); + #b_label{} -> do_b_label(I); + #bc{} -> do_bc(I); + #bctr{} -> do_bctr(I); + #bctrl{} -> do_bctrl(I); + #bl{} -> do_bl(I); + #blr{} -> do_blr(I); + #comment{} -> []; + #cmp{} -> do_cmp(I); + #label{} -> do_label(I); + #load{} -> do_load(I); + #loadx{} -> do_loadx(I); + #mfspr{} -> do_mfspr(I); + #mtcr{} -> do_mtcr(I); + #mtspr{} -> do_mtspr(I); + %% pseudo_bc: eliminated before assembly + %% pseudo_call: eliminated before assembly + %% pseudo_call_prepare: eliminated before assembly + #pseudo_li{} -> do_pseudo_li(I, MFA, ConstMap); + %% pseudo_move: eliminated before assembly + %% pseudo_tailcall: eliminated before assembly + %% pseudo_tailcall_prepare: eliminated before assembly + #store{} -> do_store(I); + #storex{} -> do_storex(I); + #unary{} -> do_unary(I); + #lfd{} -> do_lfd(I); + #stfd{} -> do_stfd(I); + #fp_binary{} -> do_fp_binary(I); + #fp_unary{} -> do_fp_unary(I); + _ -> exit({?MODULE,translate_insn,I}) + end. + +do_alu(I) -> + #alu{aluop=AluOp,dst=Dst,src1=Src1,src2=Src2} = I, + NewDst = do_reg(Dst), + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + {NewI,NewOpnds} = + case AluOp of + 'slwi' -> {'rlwinm', do_slwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'slwi.' -> {'rlwinm.', do_slwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srwi' -> {'rlwinm', do_srwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srwi.' -> {'rlwinm.', do_srwi_opnds(NewDst, NewSrc1, NewSrc2)}; + 'srawi' -> {'srawi', {NewDst,NewSrc1,do_srawi_src2(NewSrc2)}}; + 'srawi.' -> {'srawi.', {NewDst,NewSrc1,do_srawi_src2(NewSrc2)}}; + _ -> {AluOp, {NewDst,NewSrc1,NewSrc2}} + end, + [{NewI, NewOpnds, I}]. + +do_slwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> + {Dst, Src1, {sh,N}, {mb,0}, {me,31-N}}. + +do_srwi_opnds(Dst, Src1, {uimm,N}) when is_integer(N), 0 =< N, N < 32 -> + {Dst, Src1, {sh,32-N}, {mb,N}, {me,31}}. + +do_srawi_src2({uimm,N}) when is_integer(N), 0 =< N, N < 32 -> {sh,N}. + +do_b_fun(I) -> + #b_fun{'fun'=Fun,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {b, {{li,0}}, I}]. + +do_b_label(I) -> + #b_label{label=Label} = I, + [{b, do_label_ref(Label), I}]. + +do_bc(I) -> + #bc{bcond=BCond,label=Label,pred=Pred} = I, + [{bc_sdi, {{bcond,BCond},do_label_ref(Label),{pred,Pred}}, I}]. + +do_bctr(I) -> + [{bcctr, {{bo,2#10100},{bi,0}}, I}]. + +do_bctrl(I) -> + #bctrl{sdesc=SDesc} = I, + [{bcctrl, {{bo,2#10100},{bi,0}}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_bl(I) -> + #bl{'fun'=Fun,sdesc=SDesc,linkage=Linkage} = I, + [{'.reloc', {b_fun,Fun,Linkage}, #comment{term='fun'}}, + {bl, {{li,0}}, I}, + {'.reloc', {sdesc,SDesc}, #comment{term=sdesc}}]. + +do_blr(I) -> + [{bclr, {{bo,2#10100},{bi,0}}, I}]. + +do_cmp(I) -> + #cmp{cmpop=CmpOp,src1=Src1,src2=Src2} = I, + NewSrc1 = do_reg(Src1), + NewSrc2 = do_reg_or_imm(Src2), + [{CmpOp, {{crf,0},0,NewSrc1,NewSrc2}, I}]. + +do_label(I) -> + #label{label=Label} = I, + [{'.label', Label, I}]. + +do_load(I) -> + #load{ldop=LdOp,dst=Dst,disp=Disp,base=Base} = I, + NewDst = do_reg(Dst), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{LdOp, {NewDst,NewDisp,NewBase}, I}]. + +do_loadx(I) -> + #loadx{ldxop=LdxOp,dst=Dst,base1=Base1,base2=Base2} = I, + NewDst = do_reg(Dst), + NewBase1 = do_reg(Base1), + NewBase2 = do_reg(Base2), + [{LdxOp, {NewDst,NewBase1,NewBase2}, I}]. + +do_mfspr(I) -> + #mfspr{dst=Dst,spr=SPR} = I, + NewDst = do_reg(Dst), + NewSPR = do_spr(SPR), + [{mfspr, {NewDst,NewSPR}, I}]. + +do_mtcr(I) -> + #mtcr{src=Src} = I, + NewSrc = do_reg(Src), + [{mtcrf, {{crm,16#80},NewSrc}, I}]. + +do_mtspr(I) -> + #mtspr{spr=SPR,src=Src} = I, + NewSPR = do_spr(SPR), + NewSrc = do_reg(Src), + [{mtspr, {NewSPR,NewSrc}, I}]. + +do_pseudo_li(I, MFA, ConstMap) -> + #pseudo_li{dst=Dst,imm=Imm} = I, + RelocData = + case Imm of + Atom when is_atom(Atom) -> + {load_atom, Atom}; +%%% {mfa,MFAorPrim,Linkage} -> +%%% Tag = +%%% case Linkage of +%%% remote -> remote_function; +%%% not_remote -> local_function +%%% end, +%%% {load_address, {Tag,untag_mfa_or_prim(MFAorPrim)}}; + {Label,constant} -> + ConstNo = find_const({MFA,Label}, ConstMap), + {load_address, {constant,ConstNo}}; + {Label,closure} -> + {load_address, {closure,Label}}; + {Label,c_const} -> + {load_address, {c_const,Label}} + end, + NewDst = do_reg(Dst), + Simm0 = {simm,0}, + [{'.reloc', RelocData, #comment{term=reloc}}, + {addi, {NewDst,{r,0},Simm0}, I}, + {addis, {NewDst,NewDst,Simm0}, I}]. + +do_store(I) -> + #store{stop=StOp,src=Src,disp=Disp,base=Base} = I, + NewSrc = do_reg(Src), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{StOp, {NewSrc,NewDisp,NewBase}, I}]. + +do_storex(I) -> + #storex{stxop=StxOp,src=Src,base1=Base1,base2=Base2} = I, + NewSrc = do_reg(Src), + NewBase1 = do_reg(Base1), + NewBase2 = do_reg(Base2), + [{StxOp, {NewSrc,NewBase1,NewBase2}, I}]. + +do_unary(I) -> + #unary{unop=UnOp,dst=Dst,src=Src} = I, + NewDst = do_reg(Dst), + NewSrc = do_reg(Src), + {NewI,NewOpnds} = + case UnOp of + {RLWINM,SH,MB,ME} -> {RLWINM, {NewDst,NewSrc,{sh,SH},{mb,MB},{me,ME}}}; + _ -> {UnOp, {NewDst,NewSrc}} + end, + [{NewI, NewOpnds, I}]. + +do_lfd(I) -> + #lfd{dst=Dst,disp=Disp,base=Base} = I, + NewDst = do_fpreg(Dst), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{lfd, {NewDst,NewDisp,NewBase}, I}]. + +do_stfd(I) -> + #stfd{src=Src,disp=Disp,base=Base} = I, + NewSrc = do_fpreg(Src), + NewDisp = do_disp(Disp), + NewBase = do_reg(Base), + [{stfd, {NewSrc,NewDisp,NewBase}, I}]. + +do_fp_binary(I) -> + #fp_binary{fp_binop=FpBinOp,dst=Dst,src1=Src1,src2=Src2} = I, + NewDst = do_fpreg(Dst), + NewSrc1 = do_fpreg(Src1), + NewSrc2 = do_fpreg(Src2), + [{FpBinOp, {NewDst,NewSrc1,NewSrc2}, I}]. + +do_fp_unary(I) -> + #fp_unary{fp_unop=FpUnOp,dst=Dst,src=Src} = I, + NewDst = do_fpreg(Dst), + NewSrc = do_fpreg(Src), + [{FpUnOp, {NewDst,NewSrc}, I}]. + +do_fpreg(#ppc_temp{reg=Reg,type='double'}) when is_integer(Reg), 0 =< Reg, Reg < 32 -> + {fr,Reg}. + +do_reg(#ppc_temp{reg=Reg,type=Type}) + when is_integer(Reg), 0 =< Reg, Reg < 32, Type =/= 'double' -> + {r,Reg}. + +do_label_ref(Label) when is_integer(Label) -> + {label,Label}. % symbolic, since offset is not yet computable + +do_reg_or_imm(Src) -> + case Src of + #ppc_temp{} -> + do_reg(Src); + #ppc_simm16{value=Value} when is_integer(Value), -32768 =< Value, Value =< 32767 -> + {simm, Value band 16#ffff}; + #ppc_uimm16{value=Value} when is_integer(Value), 0 =< Value, Value =< 65535 -> + {uimm, Value} + end. + +do_disp(Disp) when is_integer(Disp), -32768 =< Disp, Disp =< 32767 -> + {d, Disp band 16#ffff}. + +do_spr(SPR) -> + SPR_NR = + case SPR of + 'xer' -> 1; + 'lr' -> 8; + 'ctr' -> 9 + end, + {spr,SPR_NR}. + +%%% +%%% Assembly Pass 3. +%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. +%%% Translate to a single binary code segment. +%%% Collect relocation patches. +%%% Build ExportMap (MFA-to-address mapping). +%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). +%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. +%%% + +encode(Code, Options) -> + CodeSize = compute_code_size(Code, 0), + ExportMap = build_export_map(Code, 0, []), + {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), + CodeBinary = list_to_binary(lists:reverse(AccCode)), + ?ASSERT(CodeSize =:= byte_size(CodeBinary)), + CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), + {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. + +compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> + compute_code_size(Code, Size+CodeSize); +compute_code_size([], Size) -> Size. + +build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> + build_export_map(Code, Address+CodeSize, [{Address,M,F,A}|ExportMap]); +build_export_map([], _Address, ExportMap) -> ExportMap. + +combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> + NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), + combine_label_maps(Code, Address+CodeSize, NewCLM); +combine_label_maps([], _Address, CLM) -> CLM. + +merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> + NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), + merge_label_map(Rest, MFA, Address, NewCLM); +merge_label_map([], _MFA, _Address, CLM) -> CLM. + +encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> + print("Generating code for: ~w\n", [MFA], Options), + print("Offset | Opcode | Instruction\n", [], Options), + {Address1,Relocs1,AccCode1} = + encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), + ExpectedAddress = Address + CodeSize, + ?ASSERT(Address1 =:= ExpectedAddress), + print("Finished.\n", [], Options), + encode_mfas(Code, Address1, AccCode1, Relocs1, Options); +encode_mfas([], _Address, AccCode, Relocs, _Options) -> + {AccCode,Relocs}. + +encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> + case I of + {'.label',L,_} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + ?ASSERT(Address =:= LabelAddress), % sanity check + print_insn(Address, [], I, Options), + encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); + {'.reloc',Data,_} -> + Reloc = encode_reloc(Data, Address, FunAddress, LabelMap), + encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); + {bc_sdi,_,_} -> + encode_insns(fix_bc_sdi(I, Insns, Address, FunAddress, LabelMap), + Address, FunAddress, LabelMap, Relocs, AccCode, Options); + _ -> + {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), + Word = hipe_ppc_encode:insn_encode(Op, Arg), + print_insn(Address, Word, I, Options), + Segment = <>, + NewAccCode = [Segment|AccCode], + encode_insns(Insns, Address+4, FunAddress, LabelMap, Relocs, NewAccCode, Options) + end; +encode_insns([], Address, _FunAddress, _LabelMap, Relocs, AccCode, _Options) -> + {Address,Relocs,AccCode}. + +encode_reloc(Data, Address, FunAddress, LabelMap) -> + case Data of + {b_fun,MFAorPrim,Linkage} -> + %% b and bl are patched the same, so no need to distinguish + %% call from tailcall + PatchTypeExt = + case Linkage of + remote -> ?CALL_REMOTE; + not_remote -> ?CALL_LOCAL + end, + {PatchTypeExt, Address, untag_mfa_or_prim(MFAorPrim)}; + {load_atom,Atom} -> + {?LOAD_ATOM, Address, Atom}; + {load_address,X} -> + {?LOAD_ADDRESS, Address, X}; + {sdesc,SDesc} -> + #ppc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, + ExnRA = + case ExnLab of + [] -> []; % don't cons up a new one + ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress + end, + {?SDESC, Address, + ?STACK_DESC(ExnRA, FSize, Arity, Live)} + end. + +untag_mfa_or_prim(#ppc_mfa{m=M,f=F,a=A}) -> {M,F,A}; +untag_mfa_or_prim(#ppc_prim{prim=Prim}) -> Prim. + +fix_bc_sdi(I, Insns, InsnAddress, FunAddress, LabelMap) -> + {bc_sdi,Opnds,OrigI} = I, + {{bcond,BCond},Label,{pred,Pred}} = Opnds, + {label,L} = Label, + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + BD = (LabelAddress - InsnAddress) div 4, + if BD >= -(16#2000), BD =< 16#1FFF -> + [{bc, Opnds, OrigI} | Insns]; + true -> + NewBCond = hipe_ppc:negate_bcond(BCond), + NewPred = 1.0 - Pred, + [{bc, + {{bcond,NewBCond},'.+8',{pred,NewPred}}, + #bc{bcond=NewBCond,label='.+8',pred=NewPred}}, %% pp will be ugly + {b, Label, #b_label{label=L}} | + Insns] + end. + +fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> + case I of + {b, {label,L}, OrigI} -> + LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, + LI = (LabelAddress - InsnAddress) div 4, + %% ensure LI fits in a 24 bit sign-extended field + ?ASSERT(LI =< 16#7FFFFF), + ?ASSERT(LI >= -(16#800000)), + {b, {{li,LI band 16#FFFFFF}}, OrigI}; + {bc, {{bcond,BCond},Target,{pred,Pred}}, OrigI} -> + LabelAddress = + case Target of + {label,L} -> gb_trees:get(L, LabelMap) + FunAddress; + '.+8' -> InsnAddress + 8 + end, + BD = (LabelAddress - InsnAddress) div 4, + %% ensure BD fits in a 14 bit sign-extended field + ?ASSERT(BD =< 16#1FFF), + ?ASSERT(BD >= -(16#2000)), + {BO1,BI} = split_bcond(BCond), + BO = mk_bo(BO1, Pred, BD), + {bc, {{bo,BO},{bi,BI},{bd,BD band 16#3FFF}}, OrigI}; + _ -> I + end. + +split_bcond(BCond) -> % {BO[1], BI for CR0} + case BCond of + 'lt' -> {1, 2#0000}; + 'ge' -> {0, 2#0000}; % not lt + 'gt' -> {1, 2#0001}; + 'le' -> {0, 2#0001}; % not gt + 'eq' -> {1, 2#0010}; + 'ne' -> {0, 2#0010}; % not eq + 'so' -> {1, 2#0011}; + 'ns' -> {0, 2#0011} % not so + end. + +mk_bo(BO1, Pred, BD) -> + (BO1 bsl 3) bor 2#00100 bor mk_y(Pred, BD). + +mk_y(Pred, BD) -> + if Pred < 0.5 -> % not taken + if BD < 0 -> 1; true -> 0 end; + true -> % taken + if BD < 0 -> 0; true -> 1 end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_data_relocs(RefsFromConsts, LabelMap) -> + lists:flatten(mk_data_relocs(RefsFromConsts, LabelMap, [])). + +mk_data_relocs([{MFA,Labels} | Rest], LabelMap, Acc) -> + Map = [case Label of + {L,Pos} -> + Offset = find({MFA,L}, LabelMap), + {Pos,Offset}; + {sorted,Base,OrderedLabels} -> + {sorted, Base, [begin + Offset = find({MFA,L}, LabelMap), + {Order, Offset} + end + || {L,Order} <- OrderedLabels]} + end + || Label <- Labels], + %% msg("Map: ~w Map\n",[Map]), + mk_data_relocs(Rest, LabelMap, [Map,Acc]); +mk_data_relocs([],_,Acc) -> Acc. + +find({_MFA,_L} = MFAL,LabelMap) -> + gb_trees:get(MFAL, LabelMap). + +slim_sorted_exportmap([{Addr,M,F,A}|Rest], Closures, Exports) -> + IsClosure = lists:member({M,F,A}, Closures), + IsExported = is_exported(F, A, Exports), + [Addr,M,F,A,IsClosure,IsExported | slim_sorted_exportmap(Rest, Closures, Exports)]; +slim_sorted_exportmap([],_,_) -> []. + +is_exported(F, A, Exports) -> lists:member({F,A}, Exports). + +%%% +%%% Assembly listing support (pp_asm option). +%%% + +print(String, Arglist, Options) -> + ?when_option(pp_asm, Options, io:format(String, Arglist)). + +print_insn(Address, Word, I, Options) -> + ?when_option(pp_asm, Options, print_insn_2(Address, Word, I)). + +print_insn_2(Address, Word, {_,_,OrigI}) -> + io:format("~8.16.0b | ", [Address]), + print_code_list(word_to_bytes(Word), 0), + hipe_ppc_pp:pp_insn(OrigI). + +word_to_bytes(W) -> + case W of + [] -> []; % label or other pseudo instruction + _ -> [(W bsr 24) band 16#FF, (W bsr 16) band 16#FF, + (W bsr 8) band 16#FF, W band 16#FF] + end. + +print_code_list([Byte|Rest], Len) -> + print_byte(Byte), + print_code_list(Rest, Len+1); +print_code_list([], Len) -> + fill_spaces(8-(Len*2)), + io:format(" | "). + +print_byte(Byte) -> + io:format("~2.16.0b", [Byte band 16#FF]). + +fill_spaces(N) when N > 0 -> + io:format(" "), + fill_spaces(N-1); +fill_spaces(0) -> + []. + +%%% +%%% Lookup a constant in a ConstMap. +%%% + +find_const({MFA,Label}, [{pcm_entry,MFA,Label,ConstNo,_,_,_}|_]) -> + ConstNo; +find_const(N, [_|R]) -> + find_const(N, R); +find_const(C, []) -> + ?EXIT({constant_not_found,C}). diff --git a/lib/hipe/ppc/hipe_ppc_cfg.erl b/lib/hipe/ppc/hipe_ppc_cfg.erl new file mode 100644 index 0000000000..13a7754831 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_cfg.erl @@ -0,0 +1,131 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_cfg). + +-export([init/1, + labels/1, start_label/1, + succ/2, + bb/2, bb_add/3]). +-export([postorder/1]). +-export([linearise/1, params/1, reverse_postorder/1]). +-export([arity/1]). +%%%-export([redirect_jmp/3, arity/1]). + +%%% these tell cfg.inc what to define (ugly as hell) +-define(BREADTH_ORDER,true). +-define(PARAMS_NEEDED,true). +-define(START_LABEL_UPDATE_NEEDED,true). + +-include("hipe_ppc.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +init(Defun) -> + Code = hipe_ppc:defun_code(Defun), + StartLab = hipe_ppc:label_label(hd(Code)), + Data = hipe_ppc:defun_data(Defun), + IsClosure = hipe_ppc:defun_is_closure(Defun), + Name = hipe_ppc:defun_mfa(Defun), + IsLeaf = hipe_ppc:defun_is_leaf(Defun), + Formals = hipe_ppc:defun_formals(Defun), + CFG0 = mk_empty_cfg(Name, StartLab, Data, IsClosure, IsLeaf, Formals), + take_bbs(Code, CFG0). + +is_branch(I) -> + case I of + #b_fun{} -> true; + #b_label{} -> true; + %% not bc + #bctr{} -> true; + %% not bctrl + %% not bl + #blr{} -> true; + #pseudo_bc{} -> true; + #pseudo_call{} -> true; + #pseudo_tailcall{} -> true; + _ -> false + end. + +branch_successors(Branch) -> + case Branch of + #b_fun{} -> []; + #b_label{label=Label} -> [Label]; + #bctr{labels=Labels} -> Labels; + #blr{} -> []; + #pseudo_bc{true_label=TrueLab,false_label=FalseLab} -> [FalseLab,TrueLab]; + #pseudo_call{contlab=ContLab, sdesc=#ppc_sdesc{exnlab=ExnLab}} -> + case ExnLab of + [] -> [ContLab]; + _ -> [ContLab,ExnLab] + end; + #pseudo_tailcall{} -> [] + end. + +-ifdef(REMOVE_TRIVIAL_BBS_NEEDED). +fails_to(_Instr) -> []. +-endif. + +-ifdef(notdef). +redirect_jmp(I, Old, New) -> + case I of + #b_label{label=Label} -> + if Old =:= Label -> I#b_label{label=New}; + true -> I + end; + #pseudo_bc{true_label=TrueLab, false_label=FalseLab} -> + I1 = if Old =:= TrueLab -> I#pseudo_bc{true_label=New}; + true -> I + end, + if Old =:= FalseLab -> I1#pseudo_bc{false_label=New}; + true -> I1 + end; + %% handle pseudo_call too? + _ -> I + end. +-endif. + +mk_goto(Label) -> + hipe_ppc:mk_b_label(Label). + +is_label(I) -> + hipe_ppc:is_label(I). + +label_name(Label) -> + hipe_ppc:label_label(Label). + +mk_label(Name) -> + hipe_ppc:mk_label(Name). + +linearise(CFG) -> % -> defun, not insn list + MFA = function(CFG), + Formals = params(CFG), + Code = linearize_cfg(CFG), + Data = data(CFG), + VarRange = hipe_gensym:var_range(ppc), + LabelRange = hipe_gensym:label_range(ppc), + IsClosure = is_closure(CFG), + IsLeaf = is_leaf(CFG), + hipe_ppc:mk_defun(MFA, Formals, IsClosure, IsLeaf, + Code, Data, VarRange, LabelRange). + +arity(CFG) -> + {_M, _F, A} = function(CFG), + A. diff --git a/lib/hipe/ppc/hipe_ppc_defuse.erl b/lib/hipe/ppc/hipe_ppc_defuse.erl new file mode 100644 index 0000000000..03a8f82abf --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_defuse.erl @@ -0,0 +1,145 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_defuse). +-export([insn_def_all/1, insn_use_all/1]). +-export([insn_def_gpr/1, insn_use_gpr/1]). +-export([insn_def_fpr/1, insn_use_fpr/1]). +-include("hipe_ppc.hrl"). + +%%% +%%% Defs and uses for both general-purpose and floating-point registers. +%%% This is needed for the frame module, alas. +%%% +insn_def_all(I) -> + addtemps(insn_def_fpr(I), insn_def_gpr(I)). + +insn_use_all(I) -> + addtemps(insn_use_fpr(I), insn_use_gpr(I)). + +%%% +%%% Defs and uses for general-purpose (integer) registers only. +%%% +insn_def_gpr(I) -> + case I of + #alu{dst=Dst} -> [Dst]; + #load{dst=Dst} -> [Dst]; + #loadx{dst=Dst} -> [Dst]; + #mfspr{dst=Dst} -> [Dst]; + #pseudo_call{} -> call_clobbered_gpr(); + #pseudo_li{dst=Dst} -> [Dst]; + #pseudo_move{dst=Dst} -> [Dst]; + #pseudo_tailcall_prepare{} -> tailcall_clobbered_gpr(); + #unary{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_gpr() -> + [hipe_ppc:mk_temp(R, T) + || {R,T} <- hipe_ppc_registers:call_clobbered() ++ all_fp_pseudos()]. + +all_fp_pseudos() -> []. % XXX: for now + +tailcall_clobbered_gpr() -> + [hipe_ppc:mk_temp(R, T) + || {R,T} <- hipe_ppc_registers:tailcall_clobbered() ++ all_fp_pseudos()]. + +insn_use_gpr(I) -> + case I of + #alu{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + #blr{} -> + [hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged')]; + #cmp{src1=Src1,src2=Src2} -> addsrc(Src2, [Src1]); + #load{base=Base} -> [Base]; + #loadx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + #mtcr{src=Src} -> [Src]; + #mtspr{src=Src} -> [Src]; + #pseudo_call{sdesc=#ppc_sdesc{arity=Arity}} -> arity_use_gpr(Arity); + #pseudo_move{src=Src} -> [Src]; + #pseudo_tailcall{arity=Arity,stkargs=StkArgs} -> + addsrcs(StkArgs, addtemps(tailcall_clobbered_gpr(), arity_use_gpr(Arity))); + #store{src=Src,base=Base} -> addtemp(Src, [Base]); + #storex{src=Src,base1=Base1,base2=Base2} -> + addtemp(Src, addtemp(Base1, [Base2])); + #unary{src=Src} -> [Src]; + #lfd{base=Base} -> [Base]; + #lfdx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + #stfd{base=Base} -> [Base]; + #stfdx{base1=Base1,base2=Base2} -> addtemp(Base1, [Base2]); + _ -> [] + end. + +arity_use_gpr(Arity) -> + [hipe_ppc:mk_temp(R, 'tagged') + || R <- hipe_ppc_registers:args(Arity)]. + +addsrcs([Arg|Args], Set) -> + addsrcs(Args, addsrc(Arg, Set)); +addsrcs([], Set) -> + Set. + +addsrc(Src, Set) -> + case Src of + #ppc_temp{} -> addtemp(Src, Set); + _ -> Set + end. + +%%% +%%% Defs and uses for floating-point registers only. +%%% +insn_def_fpr(I) -> + case I of + #pseudo_call{} -> call_clobbered_fpr(); + #lfd{dst=Dst} -> [Dst]; + #lfdx{dst=Dst} -> [Dst]; + #fp_binary{dst=Dst} -> [Dst]; + #fp_unary{dst=Dst} -> [Dst]; + #pseudo_fmove{dst=Dst} -> [Dst]; + _ -> [] + end. + +call_clobbered_fpr() -> + [hipe_ppc:mk_temp(R, 'double') || R <- hipe_ppc_registers:allocatable_fpr()]. + +insn_use_fpr(I) -> + case I of + #stfd{src=Src} -> [Src]; + #stfdx{src=Src} -> [Src]; + #fp_binary{src1=Src1,src2=Src2} -> addtemp(Src1, [Src2]); + #fp_unary{src=Src} -> [Src]; + #pseudo_fmove{src=Src} -> [Src]; + _ -> [] + end. + +%%% +%%% Auxiliary operations on sets of temps +%%% These sets are small. No point using gb_trees, right? +%%% + +addtemps([Arg|Args], Set) -> + addtemps(Args, addtemp(Arg, Set)); +addtemps([], Set) -> + Set. + +addtemp(Temp, Set) -> + case lists:member(Temp, Set) of + false -> [Temp|Set]; + _ -> Set + end. diff --git a/lib/hipe/ppc/hipe_ppc_encode.erl b/lib/hipe/ppc/hipe_ppc_encode.erl new file mode 100644 index 0000000000..97cb0bf635 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_encode.erl @@ -0,0 +1,1558 @@ +%%% -*- erlang-indent-level: 4 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Encode symbolic PowerPC instructions to binary form. +%%% Copyright (C) 2003-2005, 2009 Mikael Pettersson +%%% +%%% Notes: +%%% - PowerPC manuals use reversed bit numbering. In a 32-bit word, +%%% the most significant bit has number 0, and the least significant +%%% bit has number 31. +%%% - PowerPC manuals list opcodes in decimal, not hex. +%%% - This module does not support AltiVec instructions. +%%% +%%% Instruction Operands: +%%% +%%% {li,LI} long branch offset/address (24 bits, signed) +%%% {bo,BO} branch control operand (5 bits, restricted) +%%% {bi,BI} branch CR field and bits operand (5 bits) +%%% {bd,BD} branch offset (14 bits, signed) +%%% {to,TO} trap condition (5 bits) +%%% {nb,NB} number of bytes to copy (5 bits) +%%% {sh,SH} shift count (5 bits) +%%% {mb,MB} mask begin bit number (5 bits) +%%% {mb6,MB6} mask begin bit number (6 bits) (64-bit) +%%% {me,ME} mask end bit number (5 bits) +%%% {me6,ME6} mask end bit number (6 bits) (64-bit) +%%% {sr,SR} segment register (4 bits) +%%% {crimm,IMM} FPSCR CR image (4 bits) +%%% {simm,SIMM} immediate operand (16 bits, signed) +%%% {uimm,UIMM} immediate operand (16 bits, unsigned) +%%% {d,Disp} load/store byte displacement (16 bits, signed) +%%% {ds,DS} load/store word displacement (14 bits, signed) (64-bit) +%%% {r,R} integer register (5 bits) +%%% {fr,FR} floating-point register (5 bits) +%%% {crf,CRF} CR field number (3 bits) +%%% {crb,CRB} CR bit number (5 bits) +%%% {tbr,TBR} TBR number (10 bits, 268 or 269) +%%% {spr,SPR} SPR number (10 bits) +%%% {crm,CRM} CR fields set (8 bits) +%%% {fm,FM} FPSCR fields set (8 bits) + +-module(hipe_ppc_encode). + +-export([insn_encode/2]). + +%-define(TESTING,1). +-ifdef(TESTING). +-export([dotest/0, dotest/1]). +-endif. + +-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). + +-define(BF(LB,RB,V), bf(LB,RB,V)). + +bf(LeftBit, RightBit, Value) -> + ?ASSERT(LeftBit >= 0), + ?ASSERT(LeftBit =< RightBit), + ?ASSERT(RightBit < 32), + ?ASSERT(Value >= 0), + ?ASSERT(Value < (1 bsl ((RightBit - LeftBit) + 1))), + Value bsl (31 - RightBit). + +-define(BIT(Pos,Val), ?BF(Pos,Pos,Val)). +-define(BITS(N,Val), ?BF(32-N,31,Val)). + +%%% I-Form Instructions +%%% b, ba, bl, bla + +b_AA_LK({{li,LI}}, AA, LK) -> + ?BF(0,5,10#18) bor ?BF(6,29,LI) bor ?BIT(30,AA) bor ?BIT(31,LK). + +%%% B-Form Instructions +%%% bc, bca, bcl, bcla + +bc_AA_LK({{bo,BO}, {bi,BI}, {bd,BD}}, AA, LK) -> + ?BF(0,5,10#16) bor ?BF(6,10,BO) bor ?BF(11,15,BI) bor ?BF(16,29,BD) bor ?BIT(30,AA) bor ?BIT(31,LK). + +%%% SC-Form Instructions +%%% sc + +sc({}) -> + ?BF(0,5,10#17) bor ?BIT(30,1). + +%%% D-Form Instructions +%%% addi, addic, addic., addis, mulli, subfic +%%% andi., andis., ori, oris, xori, xoris +%%% lbz, lbzu, lha, lhau, lhz, lhzu, lwz, lwzu, lfd, lfdu, lfs, lfsu, lmw +%%% stb, stbu, sth, sthu, stw, stwu, stfd, stfdu, stfs, stfsu, stmw +%%% cmpi, cmpli, twi +%%% tdi (64-bit) + +d_form(OPCD, D, A, IMM) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,31,IMM). + +d_form_D_A_SIMM(OPCD, {{r,D}, {r,A}, {simm,SIMM}}) -> + d_form(OPCD, D, A, SIMM). + +addi(Opnds) -> d_form_D_A_SIMM(10#14, Opnds). +addic(Opnds) -> d_form_D_A_SIMM(10#12, Opnds). +addic_dot(Opnds) -> d_form_D_A_SIMM(10#13, Opnds). +addis(Opnds) -> d_form_D_A_SIMM(10#15, Opnds). +mulli(Opnds) -> d_form_D_A_SIMM(10#07, Opnds). +subfic(Opnds) -> d_form_D_A_SIMM(10#08, Opnds). + +d_form_S_A_UIMM(OPCD, {{r,A}, {r,S}, {uimm,UIMM}}) -> + d_form(OPCD, S, A, UIMM). + +andi_dot(Opnds) -> d_form_S_A_UIMM(10#28, Opnds). +andis_dot(Opnds) -> d_form_S_A_UIMM(10#29, Opnds). +ori(Opnds) -> d_form_S_A_UIMM(10#24, Opnds). +oris(Opnds) -> d_form_S_A_UIMM(10#25, Opnds). +xori(Opnds) -> d_form_S_A_UIMM(10#26, Opnds). +xoris(Opnds) -> d_form_S_A_UIMM(10#27, Opnds). + +d_form_D_A_d_simple(OPCD, {{r,D}, {d,Disp}, {r,A}}) -> + d_form(OPCD, D, A, Disp). + +d_form_D_A_d_update(OPCD, {{r,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + d_form(OPCD, D, A, Disp). + +lbz(Opnds) -> d_form_D_A_d_simple(10#34, Opnds). +lbzu(Opnds) -> d_form_D_A_d_update(10#35, Opnds). +lha(Opnds) -> d_form_D_A_d_simple(10#42, Opnds). +lhau(Opnds) -> d_form_D_A_d_update(10#43, Opnds). +lhz(Opnds) -> d_form_D_A_d_simple(10#40, Opnds). +lhzu(Opnds) -> d_form_D_A_d_update(10#41, Opnds). +lwz(Opnds) -> d_form_D_A_d_simple(10#32, Opnds). +lwzu(Opnds) -> d_form_D_A_d_update(10#33, Opnds). + +d_form_frD_A_d_simple(OPCD, {{fr,D}, {d,Disp}, {r,A}}) -> + d_form(OPCD, D, A, Disp). + +d_form_frD_A_d_update(OPCD, {{fr,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, D, A, Disp). + +lfd(Opnds) -> d_form_frD_A_d_simple(10#50, Opnds). +lfdu(Opnds) -> d_form_frD_A_d_update(10#51, Opnds). +lfs(Opnds) -> d_form_frD_A_d_simple(10#48, Opnds). +lfsu(Opnds) -> d_form_frD_A_d_update(10#49, Opnds). + +lmw({{r,D}, {d,Disp}, {r,A}}) -> + ?ASSERT(A < D), + d_form(10#46, D, A, Disp). + +d_form_S_A_d_simple(OPCD, {{r,S}, {d,Disp}, {r,A}}) -> + d_form(OPCD, S, A, Disp). + +d_form_S_A_d_update(OPCD, {{r,S}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, S, A, Disp). + +stb(Opnds) -> d_form_S_A_d_simple(10#38, Opnds). +stbu(Opnds) -> d_form_S_A_d_update(10#39, Opnds). +sth(Opnds) -> d_form_S_A_d_simple(10#44, Opnds). +sthu(Opnds) -> d_form_S_A_d_update(10#45, Opnds). +stmw(Opnds) -> d_form_S_A_d_simple(10#47, Opnds). +stw(Opnds) -> d_form_S_A_d_simple(10#36, Opnds). +stwu(Opnds) -> d_form_S_A_d_update(10#37, Opnds). + +d_form_frS_A_d_simple(OPCD, {{fr,S}, {d,Disp}, {r,A}}) -> + d_form(OPCD, S, A, Disp). + +d_form_frS_A_d_update(OPCD, {{fr,S}, {d,Disp}, {r,A}}) -> + ?ASSERT(A =/= 0), + d_form(OPCD, S, A, Disp). + +stfd(Opnds) -> d_form_frS_A_d_simple(10#54, Opnds). +stfdu(Opnds) -> d_form_frS_A_d_update(10#55, Opnds). +stfs(Opnds) -> d_form_frS_A_d_simple(10#52, Opnds). +stfsu(Opnds) -> d_form_frS_A_d_update(10#53, Opnds). + +cmpi({{crf,CRFD}, L, {r,A}, {simm,SIMM}}) -> + %% ?ASSERT(L == 0), % L must be zero in 32-bit code + d_form(10#11, (CRFD bsl 2) bor L, A, SIMM). + +cmpli({{crf,CRFD}, L, {r,A}, {uimm,UIMM}}) -> + %% ?ASSERT(L == 0), % L must be zero in 32-bit code + d_form(10#10, (CRFD bsl 2) bor L, A, UIMM). + +d_form_OPCD_TO_A_SIMM(OPCD, {{to,TO}, {r,A}, {simm,SIMM}}) -> + d_form(OPCD, TO, A, SIMM). + +tdi(Opnds) -> d_form_OPCD_TO_A_SIMM(10#02, Opnds). % 64-bit +twi(Opnds) -> d_form_OPCD_TO_A_SIMM(10#03, Opnds). + +%%% DS-Form Instructions +%%% ld, ldu, lwa, std, stdu (64-bit) + +ds_form(OPCD, D, A, DS, XO) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,29,DS) bor ?BF(30,31,XO). + +ds_form_D_A_DS_XO_simple(OPCD, {{r,D}, {ds,DS}, {r,A}}, XO) -> + ds_form(OPCD, D, A, DS, XO). + +ds_form_D_A_DS_XO_update(OPCD, {{r,D}, {ds,DS}, {r,A}}, XO) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + ds_form(OPCD, D, A, DS, XO). + +ld(Opnds) -> ds_form_D_A_DS_XO_simple(10#58, Opnds, 10#0). % 64-bit +ldu(Opnds) -> ds_form_D_A_DS_XO_update(10#58, Opnds, 10#1). % 64-bit +lwa(Opnds) -> ds_form_D_A_DS_XO_simple(10#58, Opnds, 10#2). % 64-bit +std(Opnds) -> ds_form_D_A_DS_XO_simple(10#62, Opnds, 10#0). % 64-bit +stdu(Opnds) -> ds_form_D_A_DS_XO_update(10#62, Opnds, 10#1). % 64-bit + +%%% X-Form Instructions +%%% ecixw, lbzux, lbzx, lhaux, lhax, lhbrx, lhzux, lhzx, lwarx, lwbrx, lwzux, lwzx, lswx +%%% lwaux, lwax (64-bit) +%%% lfdux, lfdx, lfsux, lfsx +%%% lswi +%%% fabs, fctiw, fctiwz, fmr, fnabs, fneg, frsp +%%% fcfid, fctid, fctidz (64-bit) +%%% mfsrin +%%% mffs +%%% mfcr, mfmsr +%%% mfsr +%%% and, andc, eqv, nand, nor, or, orc, slw, sraw, srw, xor +%%% sld, srad, srd (64-bit) +%%% stwcx. +%%% stdcx. (64-bit) +%%% ecowx, stbx, stbux, sthbrx, sthx, sthux, stswx, stwbrx, stwx, stwux +%%% stdux, stdx (64-bit) +%%% stfdx, stfdux, stfiwx, stfsx, stfsux +%%% stswi +%%% cntlzw, extsb, extsh +%%% cntlzd, extsw (64-bit) +%%% mtmsr +%%% mtmsrd (64-bit) +%%% mtsr, mtsrin +%%% mtsrd, mtsrdin (64-bit) +%%% srawi +%%% sradi (64-bit) +%%% cmp, cmpl +%%% fcmpo, fcmpu +%%% mcrfs +%%% mcrxr (obsolete) +%%% mtfsfi +%%% tw +%%% td (64-bit) +%%% mtfsb0, mtfsb1 +%%% dcba, dcbf, dcbi, dcbst, dcbt, dcbtst, dcbz, icbi +%%% tlbie +%%% eieio, sync, tlbia, tlbsync + +x_form(OPCD, D, A, B, XO, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,30,XO) bor ?BIT(31,Rc). + +x_form_D_A_B_XO_simple({{r,D}, {r,A}, {r,B}}, XO) -> + x_form(10#31, D, A, B, XO, 0). + +x_form_D_A_B_XO_update({{r,D}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + ?ASSERT(A =/= D), + x_form(10#31, D, A, B, XO, 0). + +eciwx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#310). % optional +lbzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#119). +lbzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#87). +ldarx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#84). % 64-bit +ldux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#53). % 64-bit +ldx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#21). % 64-bit +lhaux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#375). +lhax(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#343). +lhbrx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#790). +lhzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#311). +lhzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#279). +lswx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#533). % XXX: incomplete checks +lwarx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#20). +lwaux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#373). % 64-bit +lwax(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#341). % 64-bit +lwbrx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#534). +lwzux(Opnds) -> x_form_D_A_B_XO_update(Opnds, 10#55). +lwzx(Opnds) -> x_form_D_A_B_XO_simple(Opnds, 10#23). + +x_form_frD_A_B_XO_simple({{fr,D}, {r,A}, {r,B}}, XO) -> + x_form(10#31, D, A, B, XO, 0). + +x_form_frD_A_B_XO_update({{fr,D}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, D, A, B, XO, 0). + +lfdux(Opnds) -> x_form_frD_A_B_XO_update(Opnds, 10#631). +lfdx(Opnds) -> x_form_frD_A_B_XO_simple(Opnds, 10#599). +lfsux(Opnds) -> x_form_frD_A_B_XO_update(Opnds, 10#567). +lfsx(Opnds) -> x_form_frD_A_B_XO_simple(Opnds, 10#535). + +lswi({{r,D}, {r,A}, {nb,NB}}) -> % XXX: incomplete checks + x_form(10#31, D, A, NB, 10#597, 0). + +x_form_D_B_XO_Rc({{fr,D}, {fr,B}}, XO, Rc) -> + x_form(10#63, D, 0, B, XO, Rc). + +fabs_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#264, Rc). +fcfid_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#846, Rc). % 64-bit +fctid_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#814, Rc). % 64-bit +fctidz_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#815, Rc). % 64-bit +fctiw_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#14, Rc). +fctiwz_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#15, Rc). +fmr_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#72, Rc). +fnabs_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#136, Rc). +fneg_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#40, Rc). +frsp_Rc(Opnds, Rc) -> x_form_D_B_XO_Rc(Opnds, 10#12, Rc). + +mfsrin({{r,D}, {r,B}}) -> % supervisor + x_form(10#31, D, 0, B, 10#659, 0). + +mffs_Rc({{fr,D}}, Rc) -> + x_form(10#63, D, 0, 0, 10#583, Rc). + +x_form_D_XO({{r,D}}, XO) -> + x_form(10#31, D, 0, 0, XO, 0). + +mfcr(Opnds) -> x_form_D_XO(Opnds, 10#19). +mfmsr(Opnds) -> x_form_D_XO(Opnds, 10#83). % supervisor + +mfsr({{r,D}, {sr,SR}}) -> % supervisor + x_form(10#31, D, ?BITS(4,SR), 0, 10#595, 0). + +x_form_S_A_B_XO_Rc({{r,A}, {r,S}, {r,B}}, XO, Rc) -> + x_form(10#31, S, A, B, XO, Rc). + +and_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#28, Rc). +andc_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#60, Rc). +eqv_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#284, Rc). +nand_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#476, Rc). +nor_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#124, Rc). +or_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#444, Rc). +orc_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#412, Rc). +sld_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#27, Rc). % 64-bit +slw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#24, Rc). +srad_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#794, Rc). % 64-bit +sraw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#792, Rc). +srd_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#539, Rc). % 64-bit +srw_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#536, Rc). +xor_Rc(Opnds, Rc) -> x_form_S_A_B_XO_Rc(Opnds, 10#316, Rc). + +xform_S_A_B_XO_1({{r,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 1). + +stdcx_dot(Opnds) -> xform_S_A_B_XO_1(Opnds, 10#214). % 64-bit +stwcx_dot(Opnds) -> xform_S_A_B_XO_1(Opnds, 10#150). + +x_form_S_A_B_XO_simple({{r,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 0). + +x_form_S_A_B_XO_update({{r,S}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, S, A, B, XO, 0). + +ecowx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#438). % optional +stbx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#215). +stbux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#247). +sthbrx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#918). +stdx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#149). % 64-bit +stdux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#181). % 64-bit +sthx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#407). +sthux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#439). +stswx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#661). +stwbrx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#662). +stwx(Opnds) -> x_form_S_A_B_XO_simple(Opnds, 10#151). +stwux(Opnds) -> x_form_S_A_B_XO_update(Opnds, 10#183). + +x_form_frS_A_B_XO_simple({{fr,S}, {r,A}, {r,B}}, XO) -> + x_form(10#31, S, A, B, XO, 0). + +x_form_frS_A_B_XO_update({{fr,S}, {r,A}, {r,B}}, XO) -> + ?ASSERT(A =/= 0), + x_form(10#31, S, A, B, XO, 0). + +stfdx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#727). +stfdux(Opnds) -> x_form_frS_A_B_XO_update(Opnds, 10#759). +stfiwx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#983). % optional +stfsx(Opnds) -> x_form_frS_A_B_XO_simple(Opnds, 10#663). +stfsux(Opnds) -> x_form_frS_A_B_XO_update(Opnds, 10#695). + +stswi({{r,S}, {r,A}, {nb,NB}}) -> + x_form(10#31, S, A, NB, 10#725, 0). + +x_form_S_A_XO_Rc({{r,A}, {r,S}}, XO, Rc) -> + x_form(10#31, S, A, 0, XO, Rc). + +cntlzd_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#58, Rc). % 64-bit +cntlzw_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#26, Rc). +extsb_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#954, Rc). +extsh_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#922, Rc). +extsw_Rc(Opnds, Rc) -> x_form_S_A_XO_Rc(Opnds, 10#986, Rc). % 64-bit + +mtmsr({{r,S}}) -> % supervisor + x_form(10#31, S, 0, 0, 10#146, 0). + +mtmsrd({{r,S}}) -> % supervisor, 64-bit + x_form(10#31, S, 0, 0, 10#178, 0). + +mtsr({{sr,SR}, {r,S}}) -> % supervisor + x_form(10#31, S, ?BITS(4,SR), 0, 10#210, 0). + +mtsrd({{sr,SR}, {r,S}}) -> % supervisor, 64-bit + x_form(10#31, S, ?BITS(4,SR), 0, 10#82, 0). + +mtsrdin({{r,S}, {r,B}}) -> % supervisor, 64-bit + x_form(10#31, S, 0, B, 10#114, 0). + +mtsrin({{r,S}, {r,B}}) -> % supervisor, 32-bit + x_form(10#31, S, 0, B, 10#242, 0). + +slbia({}) -> % supervisor, 64-bit + x_form(10#31, 0, 0, 0, 10#498, 0). + +slbie({{r,B}}) -> % supervisor, 64-bit + x_form(10#31, 0, 0, B, 10#434, 0). + +srawi_Rc({{r,A}, {r,S}, {sh,SH}}, Rc) -> + x_form(10#31, S, A, SH, 10#824, Rc). + +x_form_crfD_L_A_B_XO({{crf,CRFD}, L, {r,A}, {r,B}}, XO) -> + %% ?ASSERT(L == 0), % L should be zero in 32-bit code + x_form(10#31, (CRFD bsl 2) bor L, A, B, XO, 0). + +cmp(Opnds) -> x_form_crfD_L_A_B_XO(Opnds, 0). +cmpl(Opnds) -> x_form_crfD_L_A_B_XO(Opnds, 10#32). + +x_form_crfD_A_B_XO({{crf,CRFD}, {fr,A}, {fr,B}}, XO) -> + x_form(10#63, CRFD bsl 2, A, B, XO, 0). + +fcmpo(Opnds) -> x_form_crfD_A_B_XO(Opnds, 10#32). +fcmpu(Opnds) -> x_form_crfD_A_B_XO(Opnds, 0). + +mcrfs({{crf,CRFD}, {crf,CRFS}}) -> + x_form(10#63, CRFD bsl 2, CRFS bsl 2, 0, 10#64, 0). + +%% mcrxr({{crf,CRFD}}) -> +%% x_form(10#31, CRFD bsl 2, 0, 0, 10#512, 0). + +mtfsfi_Rc({{crf,CRFD}, {crimm,IMM}}, Rc) -> + x_form(10#63, CRFD bsl 2, 0, IMM bsl 1, 10#134, Rc). + +x_form_TO_A_B_XO({{to,TO}, {r,A}, {r,B}}, XO) -> + x_form(10#31, TO, A, B, XO, 0). + +td(Opnds) -> x_form_TO_A_B_XO(Opnds, 10#68). % 64-bit +tw(Opnds) -> x_form_TO_A_B_XO(Opnds, 10#4). + +x_form_crbD_XO_Rc({{crb,CRBD}}, XO, Rc) -> + x_form(10#63, CRBD, 0, 0, XO, Rc). + +mtfsb0_Rc(Opnds, Rc) -> x_form_crbD_XO_Rc(Opnds, 10#70, Rc). +mtfsb1_Rc(Opnds, Rc) -> x_form_crbD_XO_Rc(Opnds, 10#38, Rc). + +x_form_A_B_XO({{r,A}, {r,B}}, XO) -> + x_form(10#31, 0, A, B, XO, 0). + +dcba(Opnds) -> x_form_A_B_XO(Opnds, 10#758). % optional +dcbf(Opnds) -> x_form_A_B_XO(Opnds, 10#86). +dcbi(Opnds) -> x_form_A_B_XO(Opnds, 10#470). % supervisor +dcbst(Opnds) -> x_form_A_B_XO(Opnds, 10#54). +dcbt(Opnds) -> x_form_A_B_XO(Opnds, 10#278). +dcbtst(Opnds) -> x_form_A_B_XO(Opnds, 10#246). +dcbz(Opnds) -> x_form_A_B_XO(Opnds, 10#1014). +icbi(Opnds) -> x_form_A_B_XO(Opnds, 10#982). + +x_form_B_XO({{r,B}}, XO) -> + x_form(10#31, 0, 0, B, XO, 0). + +tlbie(Opnds) -> x_form_B_XO(Opnds, 10#306). % supervisor, optional +tlbld(Opnds) -> x_form_B_XO(Opnds, 10#978). % supervisor, optional +tlbli(Opnds) -> x_form_B_XO(Opnds, 10#1010). % supervisor, optional + +x_form_XO({}, XO) -> + x_form(10#31, 0, 0, 0, XO, 0). + +eieio(Opnds) -> x_form_XO(Opnds, 10#854). +sync(Opnds) -> x_form_XO(Opnds, 10#598). +tlbia(Opnds) -> x_form_XO(Opnds, 10#370). % supervisor, optional +tlbsync(Opnds) -> x_form_XO(Opnds, 10#566). % supervisor, optional + +%%% XL-Form Instructions +%%% bcctr, bclr +%%% crand, crandc, creqv, crnand, crnor, cror, crorc, crxor +%%% mcrf +%%% isync, rfi +%%% rfid (64-bit) + +xl_form(A, B, C, XO, LK) -> + ?BF(0,5,10#19) bor ?BF(6,10,A) bor ?BF(11,15,B) bor ?BF(16,20,C) bor ?BF(21,30,XO) bor ?BIT(31,LK). + +xl_form_BO_BI_XO_LK({{bo,BO}, {bi,BI}}, XO, LK) -> + xl_form(BO, BI, 0, XO, LK). + +bcctr_lk(Opnds, LK) -> xl_form_BO_BI_XO_LK(Opnds, 10#528, LK). +bclr_lk(Opnds, LK) -> xl_form_BO_BI_XO_LK(Opnds, 10#16, LK). + +xl_form_crbD_crbA_crbB_XO({{crb,CRBD}, {crb,CRBA}, {crb,CRBB}}, XO) -> + xl_form(CRBD, CRBA, CRBB, XO, 0). + +crand(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#257). +crandc(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#129). +creqv(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#289). +crnand(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#225). +crnor(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#33). +cror(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#449). +crorc(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#417). +crxor(Opnds) -> xl_form_crbD_crbA_crbB_XO(Opnds, 10#193). + +mcrf({{crf,CRFD}, {crf,CRFS}}) -> + xl_form(CRFD bsl 2, CRFS bsl 2, 0, 0, 0). + +xl_form_XO({}, XO) -> + xl_form(0, 0, 0, XO, 0). + +isync(Opnds) -> xl_form_XO(Opnds, 10#150). +rfi(Opnds) -> xl_form_XO(Opnds, 10#50). % supervisor +rfid(Opnds) -> xl_form_XO(Opnds, 10#18). % supervisor, 64-bit + +%%% XFX-Form Instructions +%%% mfspr, mtspr, mftb, mtcrf + +xfx_form(A, B, XO) -> + ?BF(0,5,10#31) bor ?BF(6,10,A) bor ?BF(11,20,B) bor ?BF(21,30,XO). + +xfx_form_R_SPR_XO(R, SPR, XO) -> + SPR04 = SPR band 16#1F, + SPR59 = (SPR bsr 5) band 16#1F, + xfx_form(R, (SPR04 bsl 5) bor SPR59, XO). + +mfspr({{r,D}, {spr,SPR}}) -> xfx_form_R_SPR_XO(D, SPR, 10#339). +mtspr({{spr,SPR}, {r,S}}) -> xfx_form_R_SPR_XO(S, SPR, 10#467). +mftb({{r,D}, {tbr,TBR}}) -> xfx_form_R_SPR_XO(D, TBR, 10#371). + +mtcrf({{crm,CRM}, {r,S}}) -> xfx_form(S, ?BITS(8,CRM) bsl 1, 10#144). + +%%% XFL-Form Instructions +%%% mtfsf + +xfl_form(FM, B, Rc) -> + ?BF(0,5,10#63) bor ?BF(7,14,FM) bor ?BF(16,20,B) bor ?BF(21,30,10#711) bor ?BIT(31,Rc). + +mtfsf_Rc({{fm,FM}, {fr,B}}, Rc) -> xfl_form(FM, B, Rc). + +%%% XS-Form Instructions +%%% sradi (64-bit) + +xs_form(S, A, SH1, XO, SH2, Rc) -> + ?BF(0,5,10#31) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH1) bor ?BF(21,29,XO) bor ?BIT(30,SH2) bor ?BIT(31,Rc). + +sradi_Rc({{r,A}, {r,S}, {sh6,SH6}}, Rc) -> % 64-bit + xs_form(S, A, sh6_bits0to4(SH6), 10#413, sh6_bit5(SH6), Rc). + +%%% XO-Form Instructions +%%% add, addc, adde, divw, divwu, mullw, subf, subfc, subfe +%%% divd, divdu, mulld (64-bit) +%%% mulhw, mulhwu +%%% mulhd, mulhdu (64-bit) +%%% addme, addze, neg, subfme, subfze + +xo_form(D, A, B, OE, XO, Rc) -> + ?BF(0,5,10#31) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BIT(21,OE) bor ?BF(22,30,XO) bor ?BIT(31,Rc). + +xo_form_D_A_B_OE_XO_Rc({{r,D}, {r,A}, {r,B}}, OE, XO, Rc) -> + xo_form(D, A, B, OE, XO, Rc). + +add_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#266, Rc). +addc_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#10, Rc). +adde_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#138, Rc). +divd_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#489, Rc). % 64-bit +divdu_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#457, Rc). % 64-bit +divw_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#491, Rc). +divwu_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#459, Rc). +mulld_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#233, Rc). % 64-bit +mullw_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#235, Rc). +subf_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#40, Rc). +subfc_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#8, Rc). +subfe_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, OE, 10#136, Rc). + +mulhd_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#73, Rc). % 64-bit +mulhdu_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#9, Rc). % 64-bit +mulhw_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#75, Rc). +mulhwu_Rc(Opnds, Rc) -> xo_form_D_A_B_OE_XO_Rc(Opnds, 0, 10#11, Rc). + +xo_form_D_A_OE_XO_Rc({{r,D}, {r,A}}, OE, XO, Rc) -> + xo_form(D, A, 0, OE, XO, Rc). + +addme_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#234, Rc). +addze_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#202, Rc). +neg_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#104, Rc). +subfme_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#232, Rc). +subfze_OE_Rc(Opnds, OE, Rc) -> xo_form_D_A_OE_XO_Rc(Opnds, OE, 10#200, Rc). + +%%% A-Form Instructions +%%% fadd, fadds, fdiv, fdivs, fsub, fsubs +%%% fmadd, fmadds, fmsub, fmsubs, fnmadd, fnmadds, fnmsub, fnmsubs, fsel +%%% fmul, fmuls +%%% fres, fsqrte, fsqrt, fsqrts + +a_form(OPCD, D, A, B, C, XO, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,D) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,25,C) bor ?BF(26,30,XO) bor ?BIT(31,Rc). + +a_form_D_A_B_XO_Rc(OPCD, {{fr,D}, {fr,A}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, A, B, 0, XO, Rc). + +fadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#21, Rc). +fadd_Rc(Opnds, Rc) -> fadd_OPCD_Rc(10#63, Opnds, Rc). +fadds_Rc(Opnds, Rc) -> fadd_OPCD_Rc(10#59, Opnds, Rc). + +fdiv_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#18, Rc). +fdiv_Rc(Opnds, Rc) -> fdiv_OPCD_Rc(10#63, Opnds, Rc). +fdivs_Rc(Opnds, Rc) -> fdiv_OPCD_Rc(10#59, Opnds, Rc). + +fsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_XO_Rc(OPCD, Opnds, 10#20, Rc). +fsub_Rc(Opnds, Rc) -> fsub_OPCD_Rc(10#63, Opnds, Rc). +fsubs_Rc(Opnds, Rc) -> fsub_OPCD_Rc(10#59, Opnds, Rc). + +a_form_D_A_B_C_XO_Rc(OPCD, {{fr,D}, {fr,A}, {fr,C}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, A, B, C, XO, Rc). + +fmadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#29, Rc). +fmadd_Rc(Opnds, Rc) -> fmadd_OPCD_Rc(10#63, Opnds, Rc). +fmadds_Rc(Opnds, Rc) -> fmadd_OPCD_Rc(10#59, Opnds, Rc). + +fmsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#28, Rc). +fmsub_Rc(Opnds, Rc) -> fmsub_OPCD_Rc(10#63, Opnds, Rc). +fmsubs_Rc(Opnds, Rc) -> fmsub_OPCD_Rc(10#59, Opnds, Rc). + +fnmadd_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#31, Rc). +fnmadd_Rc(Opnds, Rc) -> fnmadd_OPCD_Rc(10#63, Opnds, Rc). +fnmadds_Rc(Opnds, Rc) -> fnmadd_OPCD_Rc(10#59, Opnds, Rc). + +fnmsub_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(OPCD, Opnds, 10#30, Rc). +fnmsub_Rc(Opnds, Rc) -> fnmsub_OPCD_Rc(10#63, Opnds, Rc). +fnmsubs_Rc(Opnds, Rc) -> fnmsub_OPCD_Rc(10#59, Opnds, Rc). + +fsel_Rc(Opnds, Rc) -> a_form_D_A_B_C_XO_Rc(10#63, Opnds, 10#23, Rc). % optional + +fmul_OPCD_Rc(OPCD, {{fr,D}, {fr,A}, {fr,C}}, Rc) -> + a_form(OPCD, D, A, 0, C, 10#25, Rc). + +fmul_Rc(Opnds, Rc) -> fmul_OPCD_Rc(10#63, Opnds, Rc). +fmuls_Rc(Opnds, Rc) -> fmul_OPCD_Rc(10#59, Opnds, Rc). + +a_form_D_B_XO_Rc(OPCD, {{fr,D}, {fr,B}}, XO, Rc) -> + a_form(OPCD, D, 0, B, 0, XO, Rc). + +fres_Rc(Opnds, Rc) -> a_form_D_B_XO_Rc(10#59, Opnds, 10#24, Rc). % optional +frsqrte_Rc(Opnds, Rc) -> a_form_D_B_XO_Rc(10#63, Opnds, 10#26, Rc). % optional + +fsqrt_OPCD_Rc(OPCD, Opnds, Rc) -> a_form_D_B_XO_Rc(OPCD, Opnds, 10#22, Rc). % optional +fsqrt_Rc(Opnds, Rc) -> fsqrt_OPCD_Rc(10#63, Opnds, Rc). % optional +fsqrts_Rc(Opnds, Rc) -> fsqrt_OPCD_Rc(10#59, Opnds, Rc). % optional + +%%% M-Form Instructions +%%% rlwimi, rlwinm +%%% rlwnm + +m_form(OPCD, S, A, SH, MB, ME, Rc) -> + ?BF(0,5,OPCD) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH) bor ?BF(21,25,MB) bor ?BF(26,30,ME) bor ?BIT(31,Rc). + +m_form_S_A_SH_MB_ME_Rc(OPCD, {{r,A}, {r,S}, {sh,SH}, {mb,MB}, {me,ME}}, Rc) -> + m_form(OPCD, S, A, SH, MB, ME, Rc). + +rlwimi_Rc(Opnds, Rc) -> m_form_S_A_SH_MB_ME_Rc(10#20, Opnds, Rc). +rlwinm_Rc(Opnds, Rc) -> m_form_S_A_SH_MB_ME_Rc(10#21, Opnds, Rc). + +rlwnm_Rc({{r,A}, {r,S}, {r,B}, {mb,MB}, {me,ME}}, Rc) -> + m_form(10#23, S, A, B, MB, ME, Rc). + +%%% MD-Form Instructions +%%% rldic, rldicl, rldicr, rldimi (64-bit) + +md_form(S, A, SH1, MB, XO, SH2, Rc) -> + ?BF(0,5,10#30) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,SH1) bor ?BF(21,26,MB) bor ?BF(27,29,XO) bor ?BIT(30,SH2) bor ?BIT(31,Rc). + +mb6_reformat(MB6) -> + ((MB6 band 16#1F) bsl 1) bor ((MB6 bsr 5) band 1). + +sh6_bits0to4(SH6) -> + SH6 band 16#1F. + +sh6_bit5(SH6) -> + (SH6 bsr 5) band 1. + +md_form_S_A_SH6_MB6_XO_Rc({{r,A}, {r,S}, {sh6,SH6}, {mb6,MB6}}, XO, Rc) -> + md_form(S, A, sh6_bits0to4(SH6), mb6_reformat(MB6), XO, sh6_bit5(SH6), Rc). + +rldic_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#2, Rc). % 64-bit +rldicl_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#0, Rc). % 64-bit +rldimi_Rc(Opnds, Rc) -> md_form_S_A_SH6_MB6_XO_Rc(Opnds, 10#3, Rc). % 64-bit + +rldicr_Rc({{r,A}, {r,S}, {sh6,SH6}, {me6,ME6}}, Rc) -> % 64-bit + md_form(S, A, sh6_bits0to4(SH6), mb6_reformat(ME6), 10#1, sh6_bit5(SH6), Rc). + +%%% MDS-Form Instructions +%%% rldcl, rldcr (64-bit) + +mds_form(S, A, B, MB, XO, Rc) -> + ?BF(0,5,10#30) bor ?BF(6,10,S) bor ?BF(11,15,A) bor ?BF(16,20,B) bor ?BF(21,26,MB) bor ?BF(27,30,XO) bor ?BIT(31,Rc). + +rldcl({{r,A}, {r,S}, {r,B}, {mb6,MB6}}, Rc) -> % 64-bit + mds_form(S, A, B, mb6_reformat(MB6), 10#8, Rc). + +rldcr({{r,A}, {r,S}, {r,B}, {me6,ME6}}, Rc) -> % 64-bit + mds_form(S, A, B, mb6_reformat(ME6), 10#9, Rc). + +%%% main encode dispatch + +insn_encode(Op, Opnds) -> + case Op of + %% I-Form + 'b' -> b_AA_LK(Opnds, 0, 0); + 'ba' -> b_AA_LK(Opnds, 1, 0); + 'bl' -> b_AA_LK(Opnds, 0, 1); + 'bla' -> b_AA_LK(Opnds, 1, 1); + %% B-Form + 'bc' -> bc_AA_LK(Opnds, 0, 0); + 'bca' -> bc_AA_LK(Opnds, 1, 0); + 'bcl' -> bc_AA_LK(Opnds, 0, 1); + 'bcla' -> bc_AA_LK(Opnds, 1, 1); + %% SC-Form + 'sc' -> sc(Opnds); + %% D-Form + 'addi' -> addi(Opnds); + 'addic' -> addic(Opnds); + 'addic.' -> addic_dot(Opnds); + 'addis' -> addis(Opnds); + 'andi.' -> andi_dot(Opnds); + 'andis.' -> andis_dot(Opnds); + 'cmpi' -> cmpi(Opnds); + 'cmpli' -> cmpli(Opnds); + 'lbz' -> lbz(Opnds); + 'lbzu' -> lbzu(Opnds); + 'lfd' -> lfd(Opnds); + 'lfdu' -> lfdu(Opnds); + 'lfs' -> lfs(Opnds); + 'lfsu' -> lfsu(Opnds); + 'lha' -> lha(Opnds); + 'lhau' -> lhau(Opnds); + 'lhz' -> lhz(Opnds); + 'lhzu' -> lhzu(Opnds); + 'lmw' -> lmw(Opnds); + 'lwz' -> lwz(Opnds); + 'lwzu' -> lwzu(Opnds); + 'mulli' -> mulli(Opnds); + 'ori' -> ori(Opnds); + 'oris' -> oris(Opnds); + 'stb' -> stb(Opnds); + 'stbu' -> stbu(Opnds); + 'stfd' -> stfd(Opnds); + 'stfdu' -> stfdu(Opnds); + 'stfs' -> stfs(Opnds); + 'stfsu' -> stfsu(Opnds); + 'sth' -> sth(Opnds); + 'sthu' -> sthu(Opnds); + 'stmw' -> stmw(Opnds); + 'stw' -> stw(Opnds); + 'stwu' -> stwu(Opnds); + 'subfic' -> subfic(Opnds); + 'tdi' -> tdi(Opnds); + 'twi' -> twi(Opnds); + 'xori' -> xori(Opnds); + 'xoris' -> xoris(Opnds); + %% DS-Form + 'ld' -> ld(Opnds); + 'ldu' -> ldu(Opnds); + 'lwa' -> lwa(Opnds); + 'std' -> std(Opnds); + 'stdu' -> stdu(Opnds); + %% X-Form + 'and' -> and_Rc(Opnds, 0); + 'and.' -> and_Rc(Opnds, 1); + 'andc' -> andc_Rc(Opnds, 0); + 'andc.' -> andc_Rc(Opnds, 1); + 'cmp' -> cmp(Opnds); + 'cmpl' -> cmpl(Opnds); + 'cntlzd' -> cntlzd_Rc(Opnds, 0); + 'cntlzd.' -> cntlzd_Rc(Opnds, 1); + 'cntlzw' -> cntlzw_Rc(Opnds, 0); + 'cntlzw.' -> cntlzw_Rc(Opnds, 1); + 'dcba' -> dcba(Opnds); + 'dcbf' -> dcbf(Opnds); + 'dcbi' -> dcbi(Opnds); + 'dcbst' -> dcbst(Opnds); + 'dcbt' -> dcbt(Opnds); + 'dcbtst' -> dcbtst(Opnds); + 'dcbz' -> dcbz(Opnds); + 'eciwx' -> eciwx(Opnds); + 'ecowx' -> ecowx(Opnds); + 'eieio' -> eieio(Opnds); + 'eqv' -> eqv_Rc(Opnds, 0); + 'eqv.' -> eqv_Rc(Opnds, 1); + 'extsb' -> extsb_Rc(Opnds, 0); + 'extsb.' -> extsb_Rc(Opnds, 1); + 'extsh' -> extsh_Rc(Opnds, 0); + 'extsh.' -> extsh_Rc(Opnds, 1); + 'extsw' -> extsw_Rc(Opnds, 0); + 'extsw.' -> extsw_Rc(Opnds, 1); + 'fabs' -> fabs_Rc(Opnds, 0); + 'fabs.' -> fabs_Rc(Opnds, 1); + 'fcfid' -> fcfid_Rc(Opnds, 0); + 'fcfid.' -> fcfid_Rc(Opnds, 1); + 'fcmpo' -> fcmpo(Opnds); + 'fcmpu' -> fcmpu(Opnds); + 'fctid' -> fctid_Rc(Opnds, 0); + 'fctid.' -> fctid_Rc(Opnds, 1); + 'fctidz' -> fctidz_Rc(Opnds, 0); + 'fctidz.' -> fctidz_Rc(Opnds, 1); + 'fctiw' -> fctiw_Rc(Opnds, 0); + 'fctiw.' -> fctiw_Rc(Opnds, 1); + 'fctiwz' -> fctiwz_Rc(Opnds, 0); + 'fctiwz.' -> fctiwz_Rc(Opnds, 1); + 'fmr' -> fmr_Rc(Opnds, 0); + 'fmr.' -> fmr_Rc(Opnds, 1); + 'fnabs' -> fnabs_Rc(Opnds, 0); + 'fnabs.' -> fnabs_Rc(Opnds, 1); + 'fneg' -> fneg_Rc(Opnds, 0); + 'fneg.' -> fneg_Rc(Opnds, 1); + 'frsp' -> frsp_Rc(Opnds, 0); + 'frsp.' -> frsp_Rc(Opnds, 1); + 'icbi' -> icbi(Opnds); + 'lbzux' -> lbzux(Opnds); + 'lbzx' -> lbzx(Opnds); + 'ldarx' -> ldarx(Opnds); + 'ldux' -> ldux(Opnds); + 'ldx' -> ldx(Opnds); + 'lfdux' -> lfdux(Opnds); + 'lfdx' -> lfdx(Opnds); + 'lfsux' -> lfsux(Opnds); + 'lfsx' -> lfsx(Opnds); + 'lhaux' -> lhaux(Opnds); + 'lhax' -> lhax(Opnds); + 'lhbrx' -> lhbrx(Opnds); + 'lhzux' -> lhzux(Opnds); + 'lhzx' -> lhzx(Opnds); + 'lswi' -> lswi(Opnds); + 'lswx' -> lswx(Opnds); + 'lwarx' -> lwarx(Opnds); + 'lwaux' -> lwaux(Opnds); + 'lwax' -> lwax(Opnds); + 'lwbrx' -> lwbrx(Opnds); + 'lwzux' -> lwzux(Opnds); + 'lwzx' -> lwzx(Opnds); + 'mcrfs' -> mcrfs(Opnds); + %% 'mcrxr' -> mcrxr(Opnds); + 'mfcr' -> mfcr(Opnds); + 'mffs' -> mffs_Rc(Opnds, 0); + 'mffs.' -> mffs_Rc(Opnds, 1); + 'mfmsr' -> mfmsr(Opnds); + 'mfsr' -> mfsr(Opnds); + 'mfsrin' -> mfsrin(Opnds); + 'mtfsb0' -> mtfsb0_Rc(Opnds, 0); + 'mtfsb0.' -> mtfsb0_Rc(Opnds, 1); + 'mtfsb1' -> mtfsb1_Rc(Opnds, 0); + 'mtfsb1.' -> mtfsb1_Rc(Opnds, 1); + 'mtfsfi' -> mtfsfi_Rc(Opnds, 0); + 'mtfsfi.' -> mtfsfi_Rc(Opnds, 1); + 'mtmsr' -> mtmsr(Opnds); + 'mtmsrd' -> mtmsrd(Opnds); + 'mtsr' -> mtsr(Opnds); + 'mtsrd' -> mtsrd(Opnds); + 'mtsrdin' -> mtsrdin(Opnds); + 'mtsrin' -> mtsrin(Opnds); + 'nand' -> nand_Rc(Opnds, 0); + 'nand.' -> nand_Rc(Opnds, 1); + 'nor' -> nor_Rc(Opnds, 0); + 'nor.' -> nor_Rc(Opnds, 1); + 'or' -> or_Rc(Opnds, 0); + 'or.' -> or_Rc(Opnds, 1); + 'orc' -> orc_Rc(Opnds, 0); + 'orc.' -> orc_Rc(Opnds, 1); + 'slbia' -> slbia(Opnds); + 'slbie' -> slbie(Opnds); + 'sld' -> sld_Rc(Opnds, 0); + 'sld.' -> sld_Rc(Opnds, 1); + 'slw' -> slw_Rc(Opnds, 0); + 'slw.' -> slw_Rc(Opnds, 1); + 'srad' -> srad_Rc(Opnds, 0); + 'srad.' -> srad_Rc(Opnds, 1); + 'sraw' -> sraw_Rc(Opnds, 0); + 'sraw.' -> sraw_Rc(Opnds, 1); + 'srawi' -> srawi_Rc(Opnds, 0); + 'srawi.' -> srawi_Rc(Opnds, 1); + 'srd' -> srd_Rc(Opnds, 0); + 'srd.' -> srd_Rc(Opnds, 1); + 'srw' -> srw_Rc(Opnds, 0); + 'srw.' -> srw_Rc(Opnds, 1); + 'stbux' -> stbux(Opnds); + 'stbx' -> stbx(Opnds); + 'stdcx.' -> stdcx_dot(Opnds); + 'stdux' -> stdux(Opnds); + 'stdx' -> stdx(Opnds); + 'stfdux' -> stfdux(Opnds); + 'stfdx' -> stfdx(Opnds); + 'stfiwx' -> stfiwx(Opnds); + 'stfsux' -> stfsux(Opnds); + 'stfsx' -> stfsx(Opnds); + 'sthbrx' -> sthbrx(Opnds); + 'sthux' -> sthux(Opnds); + 'sthx' -> sthx(Opnds); + 'stswi' -> stswi(Opnds); + 'stswx' -> stswx(Opnds); + 'stwbrx' -> stwbrx(Opnds); + 'stwcx.' -> stwcx_dot(Opnds); + 'stwux' -> stwux(Opnds); + 'stwx' -> stwx(Opnds); + 'sync' -> sync(Opnds); + 'td' -> td(Opnds); + 'tlbia' -> tlbia(Opnds); % not implemented in MPC603e or MPC7450 + 'tlbie' -> tlbie(Opnds); + 'tlbld' -> tlbld(Opnds); + 'tlbli' -> tlbli(Opnds); + 'tlbsync' -> tlbsync(Opnds); + 'tw' -> tw(Opnds); + 'xor' -> xor_Rc(Opnds, 0); + 'xor.' -> xor_Rc(Opnds, 1); + %% XL-Form + 'bcctr' -> bcctr_lk(Opnds, 0); + 'bcctrl' -> bcctr_lk(Opnds, 1); + 'bclr' -> bclr_lk(Opnds, 0); + 'bclrl' -> bclr_lk(Opnds, 1); + 'crand' -> crand(Opnds); + 'crandc' -> crandc(Opnds); + 'creqv' -> creqv(Opnds); + 'crnand' -> crnand(Opnds); + 'crnor' -> crnor(Opnds); + 'cror' -> cror(Opnds); + 'crorc' -> crorc(Opnds); + 'crxor' -> crxor(Opnds); + 'isync' -> isync(Opnds); + 'mcrf' -> mcrf(Opnds); + 'rfi' -> rfi(Opnds); + 'rfid' -> rfid(Opnds); + %% XFX-Form + 'mfspr' -> mfspr(Opnds); + 'mftb' -> mftb(Opnds); + 'mtcrf' -> mtcrf(Opnds); + 'mtspr' -> mtspr(Opnds); + %% XFL-Form + 'mtfsf' -> mtfsf_Rc(Opnds, 0); + 'mtfsf.' -> mtfsf_Rc(Opnds, 1); + %% XS-Form + 'sradi' -> sradi_Rc(Opnds, 0); + 'sradi.' -> sradi_Rc(Opnds, 1); + %% XO-Form + 'add' -> add_OE_Rc(Opnds, 0, 0); + 'add.' -> add_OE_Rc(Opnds, 0, 1); + 'addo' -> add_OE_Rc(Opnds, 1, 0); + 'addo.' -> add_OE_Rc(Opnds, 1, 1); + 'addc' -> addc_OE_Rc(Opnds, 0, 0); + 'addc.' -> addc_OE_Rc(Opnds, 0, 1); + 'addco' -> addc_OE_Rc(Opnds, 1, 0); + 'addco.' -> addc_OE_Rc(Opnds, 1, 1); + 'adde' -> adde_OE_Rc(Opnds, 0, 0); + 'adde.' -> adde_OE_Rc(Opnds, 0, 1); + 'addeo' -> adde_OE_Rc(Opnds, 1, 0); + 'addeo.' -> adde_OE_Rc(Opnds, 1, 1); + 'addme' -> addme_OE_Rc(Opnds, 0, 0); + 'addme.' -> addme_OE_Rc(Opnds, 0, 1); + 'addmeo' -> addme_OE_Rc(Opnds, 1, 0); + 'addmeo.' -> addme_OE_Rc(Opnds, 1, 1); + 'addze' -> addze_OE_Rc(Opnds, 0, 0); + 'addze.' -> addze_OE_Rc(Opnds, 0, 1); + 'addzeo' -> addze_OE_Rc(Opnds, 1, 0); + 'addzeo.' -> addze_OE_Rc(Opnds, 1, 1); + 'divd' -> divd_OE_Rc(Opnds, 0, 0); + 'divd.' -> divd_OE_Rc(Opnds, 0, 1); + 'divdo' -> divd_OE_Rc(Opnds, 1, 0); + 'divdo.' -> divd_OE_Rc(Opnds, 1, 1); + 'divdu' -> divdu_OE_Rc(Opnds, 0, 0); + 'divdu.' -> divdu_OE_Rc(Opnds, 0, 1); + 'divduo' -> divdu_OE_Rc(Opnds, 1, 0); + 'divduo.' -> divdu_OE_Rc(Opnds, 1, 1); + 'divw' -> divw_OE_Rc(Opnds, 0, 0); + 'divw.' -> divw_OE_Rc(Opnds, 0, 1); + 'divwo' -> divw_OE_Rc(Opnds, 1, 0); + 'divwo.' -> divw_OE_Rc(Opnds, 1, 1); + 'divwu' -> divwu_OE_Rc(Opnds, 0, 0); + 'divwu.' -> divwu_OE_Rc(Opnds, 0, 1); + 'divwuo' -> divwu_OE_Rc(Opnds, 1, 0); + 'divwuo.' -> divwu_OE_Rc(Opnds, 1, 1); + 'mulhd' -> mulhd_Rc(Opnds, 0); + 'mulhd.' -> mulhd_Rc(Opnds, 1); + 'mulhdu' -> mulhdu_Rc(Opnds, 0); + 'mulhdu.' -> mulhdu_Rc(Opnds, 1); + 'mulhw' -> mulhw_Rc(Opnds, 0); + 'mulhw.' -> mulhw_Rc(Opnds, 1); + 'mulhwu' -> mulhwu_Rc(Opnds, 0); + 'mulhwu.' -> mulhwu_Rc(Opnds, 1); + 'mulld' -> mulld_OE_Rc(Opnds, 0, 0); + 'mulld.' -> mulld_OE_Rc(Opnds, 0, 1); + 'mulldo' -> mulld_OE_Rc(Opnds, 1, 0); + 'mulldo.' -> mulld_OE_Rc(Opnds, 1, 1); + 'mullw' -> mullw_OE_Rc(Opnds, 0, 0); + 'mullw.' -> mullw_OE_Rc(Opnds, 0, 1); + 'mullwo' -> mullw_OE_Rc(Opnds, 1, 0); + 'mullwo.' -> mullw_OE_Rc(Opnds, 1, 1); + 'neg' -> neg_OE_Rc(Opnds, 0, 0); + 'neg.' -> neg_OE_Rc(Opnds, 0, 1); + 'nego' -> neg_OE_Rc(Opnds, 1, 0); + 'nego.' -> neg_OE_Rc(Opnds, 1, 1); + 'subf' -> subf_OE_Rc(Opnds, 0, 0); + 'subf.' -> subf_OE_Rc(Opnds, 0, 1); + 'subfo' -> subf_OE_Rc(Opnds, 1, 0); + 'subfo.' -> subf_OE_Rc(Opnds, 1, 1); + 'subfc' -> subfc_OE_Rc(Opnds, 0, 0); + 'subfc.' -> subfc_OE_Rc(Opnds, 0, 1); + 'subfco' -> subfc_OE_Rc(Opnds, 1, 0); + 'subfco.' -> subfc_OE_Rc(Opnds, 1, 1); + 'subfe' -> subfe_OE_Rc(Opnds, 0, 0); + 'subfe.' -> subfe_OE_Rc(Opnds, 0, 1); + 'subfeo' -> subfe_OE_Rc(Opnds, 1, 0); + 'subfeo.' -> subfe_OE_Rc(Opnds, 1, 1); + 'subfme' -> subfme_OE_Rc(Opnds, 0, 0); + 'subfme.' -> subfme_OE_Rc(Opnds, 0, 1); + 'subfmeo' -> subfme_OE_Rc(Opnds, 1, 0); + 'subfmeo.' -> subfme_OE_Rc(Opnds, 1, 1); + 'subfze' -> subfze_OE_Rc(Opnds, 0, 0); + 'subfze.' -> subfze_OE_Rc(Opnds, 0, 1); + 'subfzeo' -> subfze_OE_Rc(Opnds, 1, 0); + 'subfzeo.' -> subfze_OE_Rc(Opnds, 1, 1); + %% A-Form + 'fadd' -> fadd_Rc(Opnds, 0); + 'fadd.' -> fadd_Rc(Opnds, 1); + 'fadds' -> fadds_Rc(Opnds, 0); + 'fadds.' -> fadds_Rc(Opnds, 1); + 'fdiv' -> fdiv_Rc(Opnds, 0); + 'fdiv.' -> fdiv_Rc(Opnds, 1); + 'fdivs' -> fdivs_Rc(Opnds, 0); + 'fdivs.' -> fdivs_Rc(Opnds, 1); + 'fmadd' -> fmadd_Rc(Opnds, 0); + 'fmadd.' -> fmadd_Rc(Opnds, 1); + 'fmadds' -> fmadds_Rc(Opnds, 0); + 'fmadds.' -> fmadds_Rc(Opnds, 1); + 'fmsub' -> fmsub_Rc(Opnds, 0); + 'fmsub.' -> fmsub_Rc(Opnds, 1); + 'fmsubs' -> fmsubs_Rc(Opnds, 0); + 'fmsubs.' -> fmsubs_Rc(Opnds, 1); + 'fmul' -> fmul_Rc(Opnds, 0); + 'fmul.' -> fmul_Rc(Opnds, 1); + 'fmuls' -> fmuls_Rc(Opnds, 0); + 'fmuls.' -> fmuls_Rc(Opnds, 1); + 'fnmadd' -> fnmadd_Rc(Opnds, 0); + 'fnmadd.' -> fnmadd_Rc(Opnds, 1); + 'fnmadds' -> fnmadds_Rc(Opnds, 0); + 'fnmadds.' -> fnmadds_Rc(Opnds, 1); + 'fnmsub' -> fnmsub_Rc(Opnds, 0); + 'fnmsub.' -> fnmsub_Rc(Opnds, 1); + 'fnmsubs' -> fnmsubs_Rc(Opnds, 0); + 'fnmsubs.' -> fnmsubs_Rc(Opnds, 1); + 'fres' -> fres_Rc(Opnds, 0); + 'fres.' -> fres_Rc(Opnds, 1); + 'frsqrte' -> frsqrte_Rc(Opnds, 0); + 'frsqrte.' -> frsqrte_Rc(Opnds, 1); + 'fsel' -> fsel_Rc(Opnds, 0); + 'fsel.' -> fsel_Rc(Opnds, 1); + 'fsqrt' -> fsqrt_Rc(Opnds, 0); % not implemented in MPC603e or MPC7450 + 'fsqrt.' -> fsqrt_Rc(Opnds, 1); % not implemented in MPC603e or MPC7450 + 'fsqrts' -> fsqrts_Rc(Opnds, 0); % not implemented in MPC603e or MPC7450 + 'fsqrts.' -> fsqrts_Rc(Opnds, 1); % not implemented in MPC603e or MPC7450 + 'fsub' -> fsub_Rc(Opnds, 0); + 'fsub.' -> fsub_Rc(Opnds, 1); + 'fsubs' -> fsubs_Rc(Opnds, 0); + 'fsubs.' -> fsubs_Rc(Opnds, 1); + %% M-Form + 'rlwimi' -> rlwimi_Rc(Opnds, 0); + 'rlwimi.' -> rlwimi_Rc(Opnds, 1); + 'rlwinm' -> rlwinm_Rc(Opnds, 0); + 'rlwinm.' -> rlwinm_Rc(Opnds, 1); + 'rlwnm' -> rlwnm_Rc(Opnds, 0); + 'rlwnm.' -> rlwnm_Rc(Opnds, 1); + %% MD-Form + 'rldic' -> rldic_Rc(Opnds, 0); + 'rldic.' -> rldic_Rc(Opnds, 1); + 'rldicl' -> rldicl_Rc(Opnds, 0); + 'rldicl.' -> rldicl_Rc(Opnds, 1); + 'rldicr' -> rldicr_Rc(Opnds, 0); + 'rldicr.' -> rldicr_Rc(Opnds, 1); + 'rldimi' -> rldimi_Rc(Opnds, 0); + 'rldimi.' -> rldimi_Rc(Opnds, 1); + %% MDS-Form + 'rldcl' -> rldcl(Opnds, 0); + 'rldcl.' -> rldcl(Opnds, 1); + 'rldcr' -> rldcr(Opnds, 0); + 'rldcr.' -> rldcr(Opnds, 1); + _ -> exit({?MODULE,insn_encode,Op}) + end. + +%%% testing interface + +-ifdef(TESTING). + +say(OS, Str) -> + file:write(OS, Str). + +hex_digit(Dig0) -> + Dig = Dig0 band 16#F, + if Dig >= 16#A -> $A + (Dig - 16#A); + true -> $0 + Dig + end. + +say_byte(OS, Byte) -> + say(OS, [hex_digit(Byte bsr 4)]), + say(OS, [hex_digit(Byte)]). + +say_word(OS, Word) -> + say(OS, "0x"), + say_byte(OS, Word bsr 24), + say_byte(OS, Word bsr 16), + say_byte(OS, Word bsr 8), + say_byte(OS, Word). + +t(OS, Op, Opnds) -> + Word = insn_encode(Op, Opnds), + say(OS, "\t.long "), + say_word(OS, Word), + say(OS, "\n"). + +dotest1(OS) -> + say(OS, "\t.text\n\t.align 4\n"), + %% + R14 = {r,14}, + R10 = {r,10}, + R11 = {r,11}, + F2 = {fr,2}, + F4 = {fr,4}, + F6 = {fr,6}, + F8 = {fr,8}, + DispM3 = {d,16#FFFD}, + DS = {ds,16#FFFD bsr 2}, + SIMM99 = {simm,10#99}, + UIMM4711 = {uimm,10#4711}, + TO_LLE = {to, 2#00110}, % =, dotest1(group_leader()). + +dotest(File) -> + {ok,OS} = file:open(File, [write]), + dotest1(OS), + file:close(OS). + +-endif. diff --git a/lib/hipe/ppc/hipe_ppc_finalise.erl b/lib/hipe/ppc/hipe_ppc_finalise.erl new file mode 100644 index 0000000000..c4b9526fec --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_finalise.erl @@ -0,0 +1,65 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_finalise). +-export([finalise/1]). +-include("hipe_ppc.hrl"). + +finalise(Defun) -> + #defun{code=Code0} = Defun, + Code1 = peep(expand(Code0)), + Defun#defun{code=Code1}. + +expand(Insns) -> + expand_list(Insns, []). + +expand_list([I|Insns], Accum) -> + expand_list(Insns, expand_insn(I, Accum)); +expand_list([], Accum) -> + lists:reverse(Accum). + +expand_insn(I, Accum) -> + case I of + #pseudo_bc{bcond=BCond,true_label=TrueLab,false_label=FalseLab,pred=Pred} -> + [hipe_ppc:mk_b_label(FalseLab), + hipe_ppc:mk_bc(BCond, TrueLab, Pred) | + Accum]; + #pseudo_call{func=FunC,sdesc=SDesc,contlab=ContLab,linkage=Linkage} -> + [hipe_ppc:mk_b_label(ContLab), + case FunC of + 'ctr' -> hipe_ppc:mk_bctrl(SDesc); + Fun -> hipe_ppc:mk_bl(Fun, SDesc, Linkage) + end | + Accum]; + #pseudo_tailcall_prepare{} -> + Accum; + _ -> + [I|Accum] + end. + +peep(Insns) -> + peep_list(Insns, []). + +peep_list([#b_label{label=Label} | (Insns = [#label{label=Label}|_])], Accum) -> + peep_list(Insns, Accum); +peep_list([I|Insns], Accum) -> + peep_list(Insns, [I|Accum]); +peep_list([], Accum) -> + lists:reverse(Accum). diff --git a/lib/hipe/ppc/hipe_ppc_frame.erl b/lib/hipe/ppc/hipe_ppc_frame.erl new file mode 100644 index 0000000000..158009872f --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_frame.erl @@ -0,0 +1,657 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_frame). +-export([frame/1]). +-include("hipe_ppc.hrl"). +-include("../rtl/hipe_literals.hrl"). + +frame(Defun) -> + Formals = fix_formals(hipe_ppc:defun_formals(Defun)), + Temps0 = all_temps(hipe_ppc:defun_code(Defun), Formals), + MinFrame = defun_minframe(Defun), + Temps = ensure_minframe(MinFrame, Temps0), + ClobbersLR = clobbers_lr(hipe_ppc:defun_code(Defun)), + CFG0 = hipe_ppc_cfg:init(Defun), + Liveness = hipe_ppc_liveness_all:analyse(CFG0), + CFG1 = do_body(CFG0, Liveness, Formals, Temps, ClobbersLR), + hipe_ppc_cfg:linearise(CFG1). + +fix_formals(Formals) -> + fix_formals(hipe_ppc_registers:nr_args(), Formals). + +fix_formals(0, Rest) -> Rest; +fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); +fix_formals(_, []) -> []. + +do_body(CFG0, Liveness, Formals, Temps, ClobbersLR) -> + Context = mk_context(Liveness, Formals, Temps, ClobbersLR), + CFG1 = do_blocks(CFG0, Context), + do_prologue(CFG1, Context). + +do_blocks(CFG, Context) -> + Labels = hipe_ppc_cfg:labels(CFG), + do_blocks(Labels, CFG, Context). + +do_blocks([Label|Labels], CFG, Context) -> + Liveness = context_liveness(Context), + LiveOut = hipe_ppc_liveness_all:liveout(Liveness, Label), + Block = hipe_ppc_cfg:bb(CFG, Label), + Code = hipe_bb:code(Block), + NewCode = do_block(Code, LiveOut, Context), + NewBlock = hipe_bb:code_update(Block, NewCode), + NewCFG = hipe_ppc_cfg:bb_add(CFG, Label, NewBlock), + do_blocks(Labels, NewCFG, Context); +do_blocks([], CFG, _) -> + CFG. + +do_block(Insns, LiveOut, Context) -> + do_block(Insns, LiveOut, Context, context_framesize(Context), []). + +do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> + {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), + do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); +do_block([], _, Context, FPoff, RevCode) -> + FPoff0 = context_framesize(Context), + if FPoff =:= FPoff0 -> []; + true -> exit({?MODULE,do_block,FPoff}) + end, + lists:reverse(RevCode, []). + +do_insn(I, LiveOut, Context, FPoff) -> + case I of + #blr{} -> + {do_blr(I, Context, FPoff), context_framesize(Context)}; + #pseudo_call{} -> + do_pseudo_call(I, LiveOut, Context, FPoff); + #pseudo_call_prepare{} -> + do_pseudo_call_prepare(I, FPoff); + #pseudo_move{} -> + {do_pseudo_move(I, Context, FPoff), FPoff}; + #pseudo_tailcall{} -> + {do_pseudo_tailcall(I, Context), context_framesize(Context)}; + #pseudo_fmove{} -> + {do_pseudo_fmove(I, Context, FPoff), FPoff}; + _ -> + {[I], FPoff} + end. + +%%% +%%% Moves, with Dst or Src possibly a pseudo +%%% + +do_pseudo_move(I, Context, FPoff) -> + Dst = hipe_ppc:pseudo_move_dst(I), + Src = hipe_ppc:pseudo_move_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + mk_store('stw', Src, Offset, mk_sp(), []); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + mk_load('lwz', Dst, Offset, mk_sp(), []); + _ -> + [hipe_ppc:mk_alu('or', Dst, Src, Src)] + end + end. + +do_pseudo_fmove(I, Context, FPoff) -> + Dst = hipe_ppc:pseudo_fmove_dst(I), + Src = hipe_ppc:pseudo_fmove_src(I), + case temp_is_pseudo(Dst) of + true -> + Offset = pseudo_offset(Dst, FPoff, Context), + hipe_ppc:mk_fstore(Src, Offset, mk_sp(), 0); + _ -> + case temp_is_pseudo(Src) of + true -> + Offset = pseudo_offset(Src, FPoff, Context), + hipe_ppc:mk_fload(Dst, Offset, mk_sp(), 0); + _ -> + [hipe_ppc:mk_fp_unary('fmr', Dst, Src)] + end + end. + +pseudo_offset(Temp, FPoff, Context) -> + FPoff + context_offset(Context, Temp). + +%%% +%%% Return - deallocate frame and emit 'ret $N' insn. +%%% + +do_blr(I, Context, FPoff) -> + %% XXX: perhaps use explicit pseudo_move;mtlr, + %% avoiding the need to hard-code Temp1 here + %% XXX: typically only one instruction between + %% the mtlr and the blr, ouch + restore_lr(FPoff, Context, + adjust_sp(FPoff + word_size() * context_arity(Context), + [I])). + +restore_lr(FPoff, Context, Rest) -> + case context_clobbers_lr(Context) of + false -> Rest; + true -> + Temp = mk_temp1(), + mk_load('lwz', Temp, FPoff - word_size(), mk_sp(), + [hipe_ppc:mk_mtspr('lr', Temp) | + Rest]) + end. + +adjust_sp(N, Rest) -> + if N =:= 0 -> + Rest; + true -> + SP = mk_sp(), + hipe_ppc:mk_addi(SP, SP, N, Rest) + end. + +%%% +%%% Recursive calls. +%%% + +do_pseudo_call_prepare(I, FPoff0) -> + %% Create outgoing arguments area on the stack. + NrStkArgs = hipe_ppc:pseudo_call_prepare_nrstkargs(I), + Offset = NrStkArgs * word_size(), + {adjust_sp(-Offset, []), FPoff0 + Offset}. + +do_pseudo_call(I, LiveOut, Context, FPoff0) -> + #ppc_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_ppc:pseudo_call_sdesc(I), + FunC = hipe_ppc:pseudo_call_func(I), + LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], + SDesc = mk_sdesc(ExnLab, Context, LiveTemps), + ContLab = hipe_ppc:pseudo_call_contlab(I), + Linkage = hipe_ppc:pseudo_call_linkage(I), + CallCode = [hipe_ppc:mk_pseudo_call(FunC, SDesc, ContLab, Linkage)], + StkArity = erlang:max(0, OrigArity - hipe_ppc_registers:nr_args()), + context_need_stack(Context, stack_need(FPoff0, StkArity, FunC)), + ArgsBytes = word_size() * StkArity, + {CallCode, FPoff0 - ArgsBytes}. + +stack_need(FPoff, StkArity, FunC) -> + case FunC of + #ppc_prim{} -> FPoff; + #ppc_mfa{m=M,f=F,a=A} -> + case erlang:is_builtin(M, F, A) of + true -> FPoff; + false -> stack_need_general(FPoff, StkArity) + end; + 'ctr' -> stack_need_general(FPoff, StkArity) + end. + +stack_need_general(FPoff, StkArity) -> + erlang:max(FPoff, FPoff + (?PPC_LEAF_WORDS - StkArity) * word_size()). + +%%% +%%% Create stack descriptors for call sites. +%%% + +mk_sdesc(ExnLab, Context, Temps) -> % for normal calls + Temps0 = only_tagged(Temps), + Live = mk_live(Context, Temps0), + Arity = context_arity(Context), + FSize = context_framesize(Context), + hipe_ppc:mk_sdesc(ExnLab, (FSize div word_size())-1, Arity, + list_to_tuple(Live)). + +only_tagged(Temps)-> + [X || X <- Temps, hipe_ppc:temp_type(X) =:= 'tagged']. + +mk_live(Context, Temps) -> + lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). + +temp_to_slot(Context, Temp) -> + (context_framesize(Context) + context_offset(Context, Temp)) + div word_size(). + +mk_minimal_sdesc(Context) -> % for inc_stack_0 calls + hipe_ppc:mk_sdesc([], 0, context_arity(Context), {}). + +%%% +%%% Tailcalls. +%%% + +do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) + Arity = context_arity(Context), + Args = hipe_ppc:pseudo_tailcall_stkargs(I), + FunC = hipe_ppc:pseudo_tailcall_func(I), + Linkage = hipe_ppc:pseudo_tailcall_linkage(I), + {Insns, FPoff1} = do_tailcall_args(Args, Context), + context_need_stack(Context, FPoff1), + StkArity = length(Args), + FPoff2 = FPoff1 + (Arity - StkArity) * word_size(), + context_need_stack(Context, stack_need(FPoff2, StkArity, FunC)), + I2 = + case FunC of + 'ctr' -> + hipe_ppc:mk_bctr([]); + Fun -> + hipe_ppc:mk_b_fun(Fun, Linkage) + end, + %% XXX: break out the LR restore, just like for blr? + restore_lr(context_framesize(Context), Context, + Insns ++ adjust_sp(FPoff2, [I2])). + +do_tailcall_args(Args, Context) -> + FPoff0 = context_framesize(Context), + Arity = context_arity(Context), + FrameTop = word_size()*Arity, + DangerOff = FrameTop - word_size()*length(Args), + %% + Moves = mk_moves(Args, FrameTop, []), + %% + {Stores, Simple, Conflict} = + split_moves(Moves, Context, DangerOff, [], [], []), + %% sanity check (shouldn't trigger any more) + if DangerOff < -FPoff0 -> + exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); + true -> [] + end, + FPoff1 = FPoff0, + %% + {Pushes, Pops, FPoff2} = split_conflict(Conflict, FPoff1, [], []), + %% + TempReg = hipe_ppc_registers:temp1(), + %% + {adjust_sp(-(FPoff2 - FPoff1), + simple_moves(Pushes, FPoff2, TempReg, + store_moves(Stores, FPoff2, TempReg, + simple_moves(Simple, FPoff2, TempReg, + simple_moves(Pops, FPoff2, TempReg, + []))))), + FPoff2}. + +mk_moves([Arg|Args], Off, Moves) -> + Off1 = Off - word_size(), + mk_moves(Args, Off1, [{Arg,Off1}|Moves]); +mk_moves([], _, Moves) -> + Moves. + +split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> + {Src,DstOff} = Move, + case src_is_pseudo(Src) of + false -> + split_moves(Moves, Context, DangerOff, [Move|Stores], + Simple, Conflict); + true -> + SrcOff = context_offset(Context, Src), + Type = typeof_temp(Src), + if SrcOff =:= DstOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, Conflict); + SrcOff >= DangerOff -> + split_moves(Moves, Context, DangerOff, Stores, + Simple, [{SrcOff,DstOff,Type}|Conflict]); + true -> + split_moves(Moves, Context, DangerOff, Stores, + [{SrcOff,DstOff,Type}|Simple], Conflict) + end + end; +split_moves([], _, _, Stores, Simple, Conflict) -> + {Stores, Simple, Conflict}. + +split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Pops) -> + FPoff1 = FPoff + word_size(), + Push = {SrcOff,-FPoff1,Type}, + Pop = {-FPoff1,DstOff,Type}, + split_conflict(Conflict, FPoff1, [Push|Pushes], [Pop|Pops]); +split_conflict([], FPoff, Pushes, Pops) -> + {lists:reverse(Pushes), Pops, FPoff}. + +simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> + Temp = hipe_ppc:mk_temp(TempReg, Type), + SP = mk_sp(), + LoadOff = FPoff+SrcOff, + StoreOff = FPoff+DstOff, + simple_moves(Moves, FPoff, TempReg, + mk_load('lwz', Temp, LoadOff, SP, + mk_store('stw', Temp, StoreOff, SP, + Rest))); +simple_moves([], _, _, Rest) -> + Rest. + +store_moves([{Src,DstOff}|Moves], FPoff, TempReg, Rest) -> + %%Type = typeof_temp(Src), + SP = mk_sp(), + StoreOff = FPoff+DstOff, + {NewSrc,FixSrc} = + case hipe_ppc:is_temp(Src) of + true -> + {Src, []}; + _ -> + Temp = hipe_ppc:mk_temp(TempReg, 'untagged'), + {Temp, hipe_ppc:mk_li(Temp, Src)} + end, + store_moves(Moves, FPoff, TempReg, + FixSrc ++ mk_store('stw', NewSrc, StoreOff, SP, Rest)); +store_moves([], _, _, Rest) -> + Rest. + +%%% +%%% Contexts +%%% + +-record(context, {liveness, framesize, arity, map, clobbers_lr, ref_maxstack}). + +mk_context(Liveness, Formals, Temps, ClobbersLR) -> + {Map, MinOff} = mk_temp_map(Formals, ClobbersLR, Temps), + FrameSize = (-MinOff), + RefMaxStack = hipe_bifs:ref(FrameSize), + #context{liveness=Liveness, + framesize=FrameSize, arity=length(Formals), + map=Map, clobbers_lr=ClobbersLR, ref_maxstack=RefMaxStack}. + +context_need_stack(#context{ref_maxstack=RM}, N) -> + M = hipe_bifs:ref_get(RM), + if N > M -> hipe_bifs:ref_set(RM, N); + true -> [] + end. + +context_maxstack(#context{ref_maxstack=RM}) -> + hipe_bifs:ref_get(RM). + +context_arity(#context{arity=Arity}) -> + Arity. + +context_framesize(#context{framesize=FrameSize}) -> + FrameSize. + +context_liveness(#context{liveness=Liveness}) -> + Liveness. + +context_offset(#context{map=Map}, Temp) -> + tmap_lookup(Map, Temp). + +context_clobbers_lr(#context{clobbers_lr=ClobbersLR}) -> ClobbersLR. + +mk_temp_map(Formals, ClobbersLR, Temps) -> + {Map, 0} = enter_vars(Formals, word_size() * length(Formals), + tmap_empty()), + TempsList = tset_to_list(Temps), + AllTemps = + case ClobbersLR of + false -> TempsList; + true -> + RA = hipe_ppc:mk_new_temp('untagged'), + [RA|TempsList] + end, + enter_vars(AllTemps, 0, Map). + +enter_vars([V|Vs], PrevOff, Map) -> + Off = + case hipe_ppc:temp_type(V) of + 'double' -> PrevOff - 2*word_size(); + _ -> PrevOff - word_size() + end, + enter_vars(Vs, Off, tmap_bind(Map, V, Off)); +enter_vars([], Off, Map) -> + {Map, Off}. + +tmap_empty() -> + gb_trees:empty(). + +tmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +tmap_lookup(Map, Key) -> + gb_trees:get(Key, Map). + +%%% +%%% do_prologue: prepend stack frame allocation code. +%%% +%%% NewStart: +%%% temp1 = *(P + P_SP_LIMIT) +%%% temp2 = SP - MaxStack +%%% cmp temp2, temp1 +%%% temp1 = LR [if ClobbersLR][hoisted] +%%% if (ltu) goto IncStack else goto AllocFrame +%%% AllocFrame: +%%% SP = temp2 [if FrameSize == MaxStack] +%%% SP -= FrameSize [if FrameSize != MaxStack] +%%% *(SP + FrameSize-WordSize) = temp1 [if ClobbersLR] +%%% goto OldStart +%%% OldStart: +%%% ... +%%% IncStack: +%%% temp1 = LR [if not ClobbersLR] +%%% bl inc_stack +%%% LR = temp1 +%%% goto NewStart + +do_prologue(CFG, Context) -> + MaxStack = context_maxstack(Context), + if MaxStack > 0 -> + FrameSize = context_framesize(Context), + OldStartLab = hipe_ppc_cfg:start_label(CFG), + NewStartLab = hipe_gensym:get_next_label(ppc), + %% + P = hipe_ppc:mk_temp(hipe_ppc_registers:proc_pointer(), 'untagged'), + Temp1 = mk_temp1(), + SP = mk_sp(), + %% + ClobbersLR = context_clobbers_lr(Context), + GotoOldStartCode = [hipe_ppc:mk_b_label(OldStartLab)], + AllocFrameCodeTail = + case ClobbersLR of + false -> GotoOldStartCode; + true -> mk_store('stw', Temp1, FrameSize-word_size(), SP, GotoOldStartCode) + end, + %% + Arity = context_arity(Context), + Guaranteed = erlang:max(0, (?PPC_LEAF_WORDS - Arity) * word_size()), + %% + {CFG1,NewStartCode} = + if MaxStack =< Guaranteed -> + %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameCode = adjust_sp(-FrameSize, AllocFrameCodeTail), + NewStartCode0 = + case ClobbersLR of + false -> AllocFrameCode; + true -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | AllocFrameCode] + end, + {CFG,NewStartCode0}; + true -> + %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), + AllocFrameLab = hipe_gensym:get_next_label(ppc), + IncStackLab = hipe_gensym:get_next_label(ppc), + Temp2 = mk_temp2(), + %% + NewStartCodeTail2 = + [hipe_ppc:mk_pseudo_bc('lt', IncStackLab, AllocFrameLab, 0.01)], + NewStartCodeTail1 = + case ClobbersLR of + false -> NewStartCodeTail2; + true -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | NewStartCodeTail2] + end, + NewStartCode0 = + [hipe_ppc:mk_load('lwz', Temp1, ?P_NSP_LIMIT, P) | + hipe_ppc:mk_addi(Temp2, SP, -MaxStack, + [hipe_ppc:mk_cmp('cmpl', Temp2, Temp1) | + NewStartCodeTail1])], + %% + AllocFrameCode = + if MaxStack =:= FrameSize -> + %% io:format("~w: MaxStack =:= FrameSize =:= ~w :-)\n", [?MODULE,MaxStack]), + [hipe_ppc:mk_alu('or', SP, Temp2, Temp2) | + AllocFrameCodeTail]; + true -> + %% io:format("~w: MaxStack ~w =/= FrameSize ~w :-(\n", [?MODULE,MaxStack,FrameSize]), + adjust_sp(-FrameSize, AllocFrameCodeTail) + end, + %% + IncStackCodeTail = + [hipe_ppc:mk_bl(hipe_ppc:mk_prim('inc_stack_0'), + mk_minimal_sdesc(Context), not_remote), + hipe_ppc:mk_mtspr('lr', Temp1), + hipe_ppc:mk_b_label(NewStartLab)], + IncStackCode = + case ClobbersLR of + true -> IncStackCodeTail; + false -> [hipe_ppc:mk_mfspr(Temp1, 'lr') | IncStackCodeTail] + end, + %% + CFG0a = hipe_ppc_cfg:bb_add(CFG, AllocFrameLab, + hipe_bb:mk_bb(AllocFrameCode)), + CFG0b = hipe_ppc_cfg:bb_add(CFG0a, IncStackLab, + hipe_bb:mk_bb(IncStackCode)), + %% + {CFG0b,NewStartCode0} + end, + %% + CFG2 = hipe_ppc_cfg:bb_add(CFG1, NewStartLab, + hipe_bb:mk_bb(NewStartCode)), + hipe_ppc_cfg:start_label_update(CFG2, NewStartLab); + true -> + CFG + end. + +%%% Create a load instruction. +%%% May clobber Dst early for large offsets. In principle we could +%%% clobber R0 if Dst =:= Base, but Dst =/= Base here in frame. + +mk_load(LdOp, Dst, Offset, Base, Rest) -> + hipe_ppc:mk_load(LdOp, Dst, Offset, Base, 'error', Rest). + +%%% Create a store instruction. +%%% May clobber R0 for large offsets. + +mk_store(StOp, Src, Offset, Base, Rest) -> + hipe_ppc:mk_store(StOp, Src, Offset, Base, 0, Rest). + +%%% typeof_temp -- what's temp's type? + +typeof_temp(Temp) -> + hipe_ppc:temp_type(Temp). + +%%% Cons up an 'SP' Temp. + +mk_sp() -> + hipe_ppc:mk_temp(hipe_ppc_registers:stack_pointer(), 'untagged'). + +%%% Cons up a 'TEMP1' Temp. + +mk_temp1() -> + hipe_ppc:mk_temp(hipe_ppc_registers:temp1(), 'untagged'). + +%%% Cons up a 'TEMP2' Temp. + +mk_temp2() -> + hipe_ppc:mk_temp(hipe_ppc_registers:temp2(), 'untagged'). + +%%% Check if an operand is a pseudo-Temp. + +src_is_pseudo(Src) -> + hipe_ppc:is_temp(Src) andalso temp_is_pseudo(Src). + +temp_is_pseudo(Temp) -> + not(hipe_ppc:temp_is_precoloured(Temp)). + +%%% +%%% Detect if a Defun's body clobbers LR. +%%% + +clobbers_lr([I|Insns]) -> + case I of + #pseudo_call{} -> true; + %% mtspr to lr cannot occur yet + _ -> clobbers_lr(Insns) + end; +clobbers_lr([]) -> false. + +%%% +%%% Build the set of all temps used in a Defun's body. +%%% + +all_temps(Code, Formals) -> + S0 = find_temps(Code, tset_empty()), + S1 = tset_del_list(S0, Formals), + tset_filter(S1, fun(T) -> temp_is_pseudo(T) end). + +find_temps([I|Insns], S0) -> + S1 = tset_add_list(S0, hipe_ppc_defuse:insn_def_all(I)), + S2 = tset_add_list(S1, hipe_ppc_defuse:insn_use_all(I)), + find_temps(Insns, S2); +find_temps([], S) -> + S. + +tset_empty() -> + gb_sets:new(). + +tset_size(S) -> + gb_sets:size(S). + +tset_insert(S, T) -> + gb_sets:add_element(T, S). + +tset_add_list(S, Ts) -> + gb_sets:union(S, gb_sets:from_list(Ts)). + +tset_del_list(S, Ts) -> + gb_sets:subtract(S, gb_sets:from_list(Ts)). + +tset_filter(S, F) -> + gb_sets:filter(F, S). + +tset_to_list(S) -> + gb_sets:to_list(S). + +%%% +%%% Compute minimum permissible frame size, ignoring spilled temps. +%%% This is done to ensure that we won't have to adjust the frame size +%%% in the middle of a tailcall. +%%% + +defun_minframe(Defun) -> + MaxTailArity = body_mta(hipe_ppc:defun_code(Defun), 0), + MyArity = length(fix_formals(hipe_ppc:defun_formals(Defun))), + erlang:max(MaxTailArity - MyArity, 0). + +body_mta([I|Code], MTA) -> + body_mta(Code, insn_mta(I, MTA)); +body_mta([], MTA) -> + MTA. + +insn_mta(I, MTA) -> + case I of + #pseudo_tailcall{arity=Arity} -> + erlang:max(MTA, Arity - hipe_ppc_registers:nr_args()); + _ -> MTA + end. + +%%% +%%% Ensure that we have enough temps to satisfy the minimum frame size, +%%% if necessary by prepending unused dummy temps. +%%% + +ensure_minframe(MinFrame, Temps) -> + ensure_minframe(MinFrame, tset_size(Temps), Temps). + +ensure_minframe(MinFrame, Frame, Temps) -> + if MinFrame > Frame -> + Temp = hipe_ppc:mk_new_temp('untagged'), + ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); + true -> Temps + end. + +word_size() -> + hipe_rtl_arch:word_size(). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_all.erl b/lib/hipe/ppc/hipe_ppc_liveness_all.erl new file mode 100644 index 0000000000..c9234e8100 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_all.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_all). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_all(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_all(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_ppc:mk_temp(Reg, Type) + end, + hipe_ppc_registers:live_at_return())). diff --git a/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl new file mode 100644 index 0000000000..ff9db21e2b --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_fpr.erl @@ -0,0 +1,34 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_fpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_fpr(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_fpr(Insn). +liveout_no_succ() -> []. diff --git a/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl new file mode 100644 index 0000000000..a55052b944 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_liveness_gpr.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_liveness_gpr). +-export([analyse/1]). +-export([liveout/2]). + +-include("hipe_ppc.hrl"). +-include("../flow/liveness.inc"). + +analyse(CFG) -> analyze(CFG). +cfg_bb(CFG, L) -> hipe_ppc_cfg:bb(CFG, L). +cfg_postorder(CFG) -> hipe_ppc_cfg:postorder(CFG). +cfg_succ(CFG, L) -> hipe_ppc_cfg:succ(CFG, L). +uses(Insn) -> hipe_ppc_defuse:insn_use_gpr(Insn). +defines(Insn) -> hipe_ppc_defuse:insn_def_gpr(Insn). +liveout_no_succ() -> + ordsets:from_list(lists:map(fun({Reg,Type}) -> + hipe_ppc:mk_temp(Reg, Type) + end, + hipe_ppc_registers:live_at_return())). diff --git a/lib/hipe/ppc/hipe_ppc_main.erl b/lib/hipe/ppc/hipe_ppc_main.erl new file mode 100644 index 0000000000..1d84f6db11 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_main.erl @@ -0,0 +1,51 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_main). +-export([rtl_to_ppc/3]). + +rtl_to_ppc(MFA, RTL, Options) -> + PPC1 = hipe_rtl_to_ppc:translate(RTL), + PPC2 = hipe_ppc_ra:ra(PPC1, Options), + PPC3 = hipe_ppc_frame:frame(PPC2), + PPC4 = hipe_ppc_finalise:finalise(PPC3), + ppc_pp(PPC4, MFA, Options), + {native, powerpc, {unprofiled, PPC4}}. + +ppc_pp(PPC, MFA, Options) -> + case proplists:get_value(pp_native, Options) of + true -> + hipe_ppc_pp:pp(PPC); + {only,Lst} when is_list(Lst) -> + case lists:member(MFA,Lst) of + true -> + hipe_ppc_pp:pp(PPC); + false -> + ok + end; + {only,MFA} -> + hipe_ppc_pp:pp(PPC); + {file,FileName} -> + {ok, File} = file:open(FileName, [write,append]), + hipe_ppc_pp:pp(File, PPC), + ok = file:close(File); + _ -> + ok + end. diff --git a/lib/hipe/ppc/hipe_ppc_pp.erl b/lib/hipe/ppc/hipe_ppc_pp.erl new file mode 100644 index 0000000000..f88e922808 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_pp.erl @@ -0,0 +1,350 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_pp). +-export([pp/1, pp/2, pp_insn/1]). + +-include("hipe_ppc.hrl"). + +pp(Defun) -> + pp(standard_io, Defun). + +pp(Dev, #defun{mfa={M,F,A}, code=Code, data=Data}) -> + Fname = atom_to_list(M)++"_"++atom_to_list(F)++"_"++integer_to_list(A), + io:format(Dev, "\t.text\n", []), + io:format(Dev, "\t.align 4\n", []), + io:format(Dev, "\t.global ~s\n", [Fname]), + io:format(Dev, "~s:\n", [Fname]), + pp_insns(Dev, Code, Fname), + io:format(Dev, "\t.rodata\n", []), + io:format(Dev, "\t.align 4\n", []), + hipe_data_pp:pp(Dev, Data, ppc, Fname), + io:format(Dev, "\n", []). + +pp_insns(Dev, [I|Is], Fname) -> + pp_insn(Dev, I, Fname), + pp_insns(Dev, Is, Fname); +pp_insns(_, [], _) -> + []. + +pp_insn(I) -> + pp_insn(standard_io, I, ""). + +pp_insn(Dev, I, Pre) -> + case I of + #alu{aluop=AluOp, dst=Dst, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [alu_op_name(AluOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_src(Dev, Src2), + io:format(Dev, "\n", []); + #b_fun{'fun'=Fun, linkage=Linkage} -> + io:format(Dev, "\tb ", []), + pp_fun(Dev, Fun), + io:format(Dev, " # ~w\n", [Linkage]); + #b_label{label=Label} -> + io:format(Dev, "\tb .~s_~w\n", [Pre, Label]); + #bc{bcond=BCond, label=Label, pred=Pred} -> + io:format(Dev, "\tb~w ~s_~w # ~.2f\n", [bcond_name(BCond), Pre, Label, Pred]); + #bctr{labels=Labels} -> + io:format(Dev, "\tbctr", []), + case Labels of + [] -> []; + _ -> + io:format(Dev, " #", []), + pp_labels(Dev, Labels, Pre) + end, + io:format(Dev, "\n", []); + #bctrl{sdesc=SDesc} -> + io:format(Dev, "\tbctrl #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, "\n", []); + #bl{'fun'=Fun, sdesc=SDesc, linkage=Linkage} -> + io:format(Dev, "\tbl ", []), + pp_fun(Dev, Fun), + io:format(Dev, " #", []), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #blr{} -> + io:format(Dev, "\tblr\n", []); + #comment{term=Term} -> + io:format(Dev, "\t# ~p\n", [Term]); + #cmp{cmpop=CmpOp, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [cmp_op_name(CmpOp)]), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_src(Dev, Src2), + io:format(Dev, "\n", []); + #label{label=Label} -> + io:format(Dev, ".~s_~w:~n", [Pre, Label]); + #load{ldop=LdOp, dst=Dst, disp=Disp, base=Base} -> + io:format(Dev, "\t~w ", [ldop_name(LdOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #loadx{ldxop=LdxOp, dst=Dst, base1=Base1, base2=Base2} -> + io:format(Dev, "\t~w ", [ldxop_name(LdxOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #mfspr{dst=Dst, spr=SPR} -> + io:format(Dev, "\tmf~w ", [spr_name(SPR)]), + pp_temp(Dev, Dst), + io:format(Dev, "\n", []); + #mtcr{src=Src} -> + io:format(Dev, "\tmtcrf 0x80, ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #mtspr{spr=SPR, src=Src} -> + io:format(Dev, "\tmt~w ", [spr_name(SPR)]), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_bc{bcond=BCond, true_label=TrueLab, false_label=FalseLab, pred=Pred} -> + io:format(Dev, "\tpseudo_bc ~w, .~s_~w # .~s_~w ~.2f\n", + [bcond_name(BCond), Pre, TrueLab, Pre, FalseLab, Pred]); + #pseudo_call{func=FunC, sdesc=SDesc, contlab=ContLab, linkage=Linkage} -> + io:format(Dev, "\tpseudo_call ", []), + pp_func(Dev, FunC), + io:format(Dev, " # contlab .~s_~w", [Pre, ContLab]), + pp_sdesc(Dev, Pre, SDesc), + io:format(Dev, " ~w\n", [Linkage]); + #pseudo_call_prepare{nrstkargs=NrStkArgs} -> + SP = hipe_ppc_registers:reg_name_gpr(hipe_ppc_registers:stack_pointer()), + io:format(Dev, "\taddi ~s, ~s, ~w # pseudo_call_prepare\n", + [SP, SP, -(4*NrStkArgs)]); + #pseudo_li{dst=Dst, imm=Imm} -> + io:format(Dev, "\tpseudo_li ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_imm(Dev, Imm), + io:format(Dev, "\n", []); + #pseudo_move{dst=Dst, src=Src} -> + io:format(Dev, "\tpseudo_move ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_tailcall{func=FunC, arity=Arity, stkargs=StkArgs, linkage=Linkage} -> + io:format(Dev, "\tpseudo_tailcall ", []), + pp_func(Dev, FunC), + io:format(Dev, "/~w (", [Arity]), + pp_args(Dev, StkArgs), + io:format(Dev, ") ~w\n", [Linkage]); + #pseudo_tailcall_prepare{} -> + io:format(Dev, "\tpseudo_tailcall_prepare\n", []); + #store{stop=StOp, src=Src, disp=Disp, base=Base} -> + io:format(Dev, "\t~s ", [stop_name(StOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #storex{stxop=StxOp, src=Src, base1=Base1, base2=Base2} -> + io:format(Dev, "\t~s ", [stxop_name(StxOp)]), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #unary{unop=UnOp, dst=Dst, src=Src} -> + io:format(Dev, "\t~w ", [unop_name(UnOp)]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #lfd{dst=Dst, disp=Disp, base=Base} -> + io:format(Dev, "\tlfd ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #lfdx{dst=Dst, base1=Base1, base2=Base2} -> + io:format(Dev, "\tlfdx ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #stfd{src=Src, disp=Disp, base=Base} -> + io:format(Dev, "\tstfd ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ~s(", [to_hex(Disp)]), + pp_temp(Dev, Base), + io:format(Dev, ")\n", []); + #stfdx{src=Src, base1=Base1, base2=Base2} -> + io:format(Dev, "\tstfdx ", []), + pp_temp(Dev, Src), + io:format(Dev, ", ", []), + pp_temp(Dev, Base1), + io:format(Dev, ", ", []), + pp_temp(Dev, Base2), + io:format(Dev, "\n", []); + #fp_binary{fp_binop=FpBinOp, dst=Dst, src1=Src1, src2=Src2} -> + io:format(Dev, "\t~s ", [FpBinOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src1), + io:format(Dev, ", ", []), + pp_temp(Dev, Src2), + io:format(Dev, "\n", []); + #fp_unary{fp_unop=FpUnOp, dst=Dst, src=Src} -> + io:format(Dev, "\t~s ", [FpUnOp]), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + #pseudo_fmove{dst=Dst, src=Src} -> + io:format(Dev, "\tpseudo_fmove ", []), + pp_temp(Dev, Dst), + io:format(Dev, ", ", []), + pp_temp(Dev, Src), + io:format(Dev, "\n", []); + _ -> + exit({?MODULE, pp_insn, I}) + end. + +to_hex(N) -> + io_lib:format("~.16x", [N, "0x"]). + +pp_sdesc(Dev, Pre, #ppc_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live}) -> + pp_sdesc_exnlab(Dev, Pre, ExnLab), + io:format(Dev, " ~s ~w [", [to_hex(FSize), Arity]), + pp_sdesc_live(Dev, Live), + io:format(Dev, "]", []). + +pp_sdesc_exnlab(Dev, _, []) -> io:format(Dev, " []", []); +pp_sdesc_exnlab(Dev, Pre, ExnLab) -> io:format(Dev, " .~s_~w", [Pre, ExnLab]). + +pp_sdesc_live(_, {}) -> []; +pp_sdesc_live(Dev, Live) -> pp_sdesc_live(Dev, Live, 1). + +pp_sdesc_live(Dev, Live, I) -> + io:format(Dev, "~s", [to_hex(element(I, Live))]), + if I < tuple_size(Live) -> + io:format(Dev, ",", []), + pp_sdesc_live(Dev, Live, I+1); + true -> [] + end. + +pp_labels(Dev, [Label|Labels], Pre) -> + io:format(Dev, " .~s_~w", [Pre, Label]), + pp_labels(Dev, Labels, Pre); +pp_labels(_, [], _) -> + []. + +pp_fun(Dev, Fun) -> + case Fun of + #ppc_mfa{m=M, f=F, a=A} -> + io:format(Dev, "~w:~w/~w", [M, F, A]); + #ppc_prim{prim=Prim} -> + io:format(Dev, "~w", [Prim]) + end. + +pp_func(Dev, FunC) -> + case FunC of + 'ctr' -> + io:format(Dev, "ctr", []); + Fun -> + pp_fun(Dev, Fun) + end. + +alu_op_name(Op) -> Op. + +bcond_name(BCond) -> BCond. + +cmp_op_name(Op) -> Op. + +spr_name(SPR) -> SPR. + +ldop_name(LdOp) -> LdOp. + +ldxop_name(LdxOp) -> LdxOp. + +stop_name(StOp) -> StOp. + +stxop_name(StxOp) -> StxOp. + +unop_name(UnOp) -> UnOp. + +pp_temp(Dev, Temp=#ppc_temp{reg=Reg, type=Type}) -> + case hipe_ppc:temp_is_precoloured(Temp) of + true -> + Name = + case Type of + 'double' -> hipe_ppc_registers:reg_name_fpr(Reg); + _ -> hipe_ppc_registers:reg_name_gpr(Reg) + end, + io:format(Dev, "~s", [Name]); + false -> + Tag = + case Type of + double -> "f"; + tagged -> "t"; + untagged -> "u" + end, + io:format(Dev, "~s~w", [Tag, Reg]) + end. + +pp_hex(Dev, Value) -> io:format(Dev, "~s", [to_hex(Value)]). +pp_simm16(Dev, #ppc_simm16{value=Value}) -> pp_hex(Dev, Value). +pp_uimm16(Dev, #ppc_uimm16{value=Value}) -> pp_hex(Dev, Value). + +pp_imm(Dev, Value) -> + if is_integer(Value) -> pp_hex(Dev, Value); + true -> io:format(Dev, "~w", [Value]) + end. + +pp_src(Dev, Src) -> + case Src of + #ppc_temp{} -> + pp_temp(Dev, Src); + #ppc_simm16{} -> + pp_simm16(Dev, Src); + #ppc_uimm16{} -> + pp_uimm16(Dev, Src) + end. + +pp_arg(Dev, Arg) -> + case Arg of + #ppc_temp{} -> + pp_temp(Dev, Arg); + _ -> + pp_hex(Dev, Arg) + end. + +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_args(_, []) -> + []. + +pp_comma_args(Dev, [A|As]) -> + io:format(Dev, ", ", []), + pp_arg(Dev, A), + pp_comma_args(Dev, As); +pp_comma_args(_, []) -> + []. diff --git a/lib/hipe/ppc/hipe_ppc_ra.erl b/lib/hipe/ppc/hipe_ppc_ra.erl new file mode 100644 index 0000000000..3de7f48de1 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra.erl @@ -0,0 +1,56 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra). +-export([ra/2]). + +ra(Defun0, Options) -> + %% hipe_ppc_pp:pp(Defun0), + {Defun1, Coloring_fp, SpillIndex} + = case proplists:get_bool(inline_fp, Options) of + true -> + hipe_regalloc_loop:ra_fp(Defun0, Options, + hipe_coalescing_regalloc, + hipe_ppc_specific_fp); + false -> + {Defun0,[],0} + end, + %% hipe_ppc_pp:pp(Defun1), + {Defun2, Coloring} + = case proplists:get_value(regalloc, Options, coalescing) of + coalescing -> + ra(Defun1, SpillIndex, Options, hipe_coalescing_regalloc); + optimistic -> + ra(Defun1, SpillIndex, Options, hipe_optimistic_regalloc); + graph_color -> + ra(Defun1, SpillIndex, Options, hipe_graph_coloring_regalloc); + linear_scan -> + hipe_ppc_ra_ls:ra(Defun1, SpillIndex, Options); + naive -> + hipe_ppc_ra_naive:ra(Defun1, Coloring_fp, Options); + _ -> + exit({unknown_regalloc_compiler_option, + proplists:get_value(regalloc,Options)}) + end, + %% hipe_ppc_pp:pp(Defun2), + hipe_ppc_ra_finalise:finalise(Defun2, Coloring, Coloring_fp). + +ra(Defun, SpillIndex, Options, RegAllocMod) -> + hipe_regalloc_loop:ra(Defun, SpillIndex, Options, RegAllocMod, hipe_ppc_specific). diff --git a/lib/hipe/ppc/hipe_ppc_ra_finalise.erl b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl new file mode 100644 index 0000000000..53f8b739c2 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_finalise.erl @@ -0,0 +1,271 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_finalise). +-export([finalise/3]). +-include("hipe_ppc.hrl"). + +finalise(Defun, TempMap, FPMap0) -> + Code = hipe_ppc:defun_code(Defun), + {_, SpillLimit} = hipe_ppc:defun_var_range(Defun), + Map = mk_ra_map(TempMap, SpillLimit), + FPMap1 = mk_ra_map_fp(FPMap0, SpillLimit), + NewCode = ra_code(Code, Map, FPMap1, []), + Defun#defun{code=NewCode}. + +ra_code([I|Insns], Map, FPMap, Accum) -> + ra_code(Insns, Map, FPMap, [ra_insn(I, Map, FPMap) | Accum]); +ra_code([], _Map, _FPMap, Accum) -> + lists:reverse(Accum). + +ra_insn(I, Map, FPMap) -> + case I of + #alu{} -> ra_alu(I, Map); + #cmp{} -> ra_cmp(I, Map); + #load{} -> ra_load(I, Map); + #loadx{} -> ra_loadx(I, Map); + #mfspr{} -> ra_mfspr(I, Map); + #mtcr{} -> ra_mtcr(I, Map); + #mtspr{} -> ra_mtspr(I, Map); + #pseudo_li{} -> ra_pseudo_li(I, Map); + #pseudo_move{} -> ra_pseudo_move(I, Map); + #pseudo_tailcall{} -> ra_pseudo_tailcall(I, Map); + #store{} -> ra_store(I, Map); + #storex{} -> ra_storex(I, Map); + #unary{} -> ra_unary(I, Map); + #lfd{} -> ra_lfd(I, Map, FPMap); + #lfdx{} -> ra_lfdx(I, Map, FPMap); + #stfd{} -> ra_stfd(I, Map, FPMap); + #stfdx{} -> ra_stfdx(I, Map, FPMap); + #fp_binary{} -> ra_fp_binary(I, FPMap); + #fp_unary{} -> ra_fp_unary(I, FPMap); + #pseudo_fmove{} -> ra_pseudo_fmove(I, FPMap); + _ -> I + end. + +ra_alu(I=#alu{dst=Dst,src1=Src1,src2=Src2}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_temp_or_imm(Src2, Map), + I#alu{dst=NewDst,src1=NewSrc1,src2=NewSrc2}. + +ra_cmp(I=#cmp{src1=Src1,src2=Src2}, Map) -> + NewSrc1 = ra_temp(Src1, Map), + NewSrc2 = ra_temp_or_imm(Src2, Map), + I#cmp{src1=NewSrc1,src2=NewSrc2}. + +ra_load(I=#load{dst=Dst,base=Base}, Map) -> + NewDst = ra_temp(Dst, Map), + NewBase = ra_temp(Base, Map), + I#load{dst=NewDst,base=NewBase}. + +ra_loadx(I=#loadx{dst=Dst,base1=Base1,base2=Base2}, Map) -> + NewDst = ra_temp(Dst, Map), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#loadx{dst=NewDst,base1=NewBase1,base2=NewBase2}. + +ra_mfspr(I=#mfspr{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#mfspr{dst=NewDst}. + +ra_mtcr(I=#mtcr{src=Src}, Map) -> + NewSrc = ra_temp(Src, Map), + I#mtcr{src=NewSrc}. + +ra_mtspr(I=#mtspr{src=Src}, Map) -> + NewSrc = ra_temp(Src, Map), + I#mtspr{src=NewSrc}. + +ra_pseudo_li(I=#pseudo_li{dst=Dst}, Map) -> + NewDst = ra_temp(Dst, Map), + I#pseudo_li{dst=NewDst}. + +ra_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + I#pseudo_move{dst=NewDst,src=NewSrc}. + +ra_pseudo_tailcall(I=#pseudo_tailcall{stkargs=StkArgs}, Map) -> + NewStkArgs = ra_args(StkArgs, Map), + I#pseudo_tailcall{stkargs=NewStkArgs}. + +ra_store(I=#store{src=Src,base=Base}, Map) -> + NewSrc = ra_temp(Src, Map), + NewBase = ra_temp(Base, Map), + I#store{src=NewSrc,base=NewBase}. + +ra_storex(I=#storex{src=Src,base1=Base1,base2=Base2}, Map) -> + NewSrc = ra_temp(Src, Map), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#storex{src=NewSrc,base1=NewBase1,base2=NewBase2}. + +ra_unary(I=#unary{dst=Dst,src=Src}, Map) -> + NewDst = ra_temp(Dst, Map), + NewSrc = ra_temp(Src, Map), + I#unary{dst=NewDst,src=NewSrc}. + +ra_lfd(I=#lfd{dst=Dst,base=Base}, Map, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewBase = ra_temp(Base, Map), + I#lfd{dst=NewDst,base=NewBase}. + +ra_lfdx(I=#lfdx{dst=Dst,base1=Base1,base2=Base2}, Map, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#lfdx{dst=NewDst,base1=NewBase1,base2=NewBase2}. + +ra_stfd(I=#stfd{src=Src,base=Base}, Map, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewBase = ra_temp(Base, Map), + I#stfd{src=NewSrc,base=NewBase}. + +ra_stfdx(I=#stfdx{src=Src,base1=Base1,base2=Base2}, Map, FPMap) -> + NewSrc = ra_temp_fp(Src, FPMap), + NewBase1 = ra_temp(Base1, Map), + NewBase2 = ra_temp(Base2, Map), + I#stfdx{src=NewSrc,base1=NewBase1,base2=NewBase2}. + +ra_fp_binary(I=#fp_binary{dst=Dst,src1=Src1,src2=Src2}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc1 = ra_temp_fp(Src1, FPMap), + NewSrc2 = ra_temp_fp(Src2, FPMap), + I#fp_binary{dst=NewDst,src1=NewSrc1,src2=NewSrc2}. + +ra_fp_unary(I=#fp_unary{dst=Dst,src=Src}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#fp_unary{dst=NewDst,src=NewSrc}. + +ra_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, FPMap) -> + NewDst = ra_temp_fp(Dst, FPMap), + NewSrc = ra_temp_fp(Src, FPMap), + I#pseudo_fmove{dst=NewDst,src=NewSrc}. + +ra_args([Arg|Args], Map) -> + [ra_temp_or_imm(Arg, Map) | ra_args(Args, Map)]; +ra_args([], _) -> + []. + +ra_temp_or_imm(Arg, Map) -> + case hipe_ppc:is_temp(Arg) of + true -> + ra_temp(Arg, Map); + false -> + Arg + end. + +ra_temp_fp(Temp, FPMap) -> + Reg = hipe_ppc:temp_reg(Temp), + case hipe_ppc:temp_type(Temp) of + 'double' -> + case hipe_ppc_registers:is_precoloured_fpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, FPMap) + end + end. + +ra_temp(Temp, Map) -> + Reg = hipe_ppc:temp_reg(Temp), + case hipe_ppc:temp_type(Temp) of + 'double' -> + exit({?MODULE,ra_temp,Temp}); + _ -> + case hipe_ppc_registers:is_precoloured_gpr(Reg) of + true -> Temp; + _ -> ra_temp_common(Reg, Temp, Map) + end + end. + +ra_temp_common(Reg, Temp, Map) -> + case gb_trees:lookup(Reg, Map) of + {value,NewReg} -> Temp#ppc_temp{reg=NewReg}; + _ -> Temp + end. + +mk_ra_map(TempMap, SpillLimit) -> + %% Build a partial map from pseudo to reg or spill. + %% Spills are represented as pseudos with indices above SpillLimit. + %% (I'd prefer to use negative indices, but that breaks + %% hipe_ppc_registers:is_precoloured/1.) + %% The frame mapping proper is unchanged, since spills look just like + %% ordinary (un-allocated) pseudos. + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, is_precoloured_gpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + TempMap). + +conv_ra_maplet(MapLet = {From,To}, SpillLimit, IsPrecoloured) -> + %% From should be a pseudo, or a hard reg mapped to itself. + if is_integer(From), From =< SpillLimit -> + case hipe_ppc_registers:IsPrecoloured(From) of + false -> []; + _ -> + case To of + {reg, From} -> []; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of From check + case To of + {reg, NewReg} -> + %% NewReg should be a hard reg, or a pseudo mapped + %% to itself (formals are handled this way). + if is_integer(NewReg) -> + case hipe_ppc_registers:IsPrecoloured(NewReg) of + true -> []; + _ -> if From =:= NewReg -> []; + true -> + exit({?MODULE,conv_ra_maplet,MapLet}) + end + end; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of NewReg check + {From, NewReg}; + {spill, SpillIndex} -> + %% SpillIndex should be >= 0. + if is_integer(SpillIndex), SpillIndex >= 0 -> []; + true -> exit({?MODULE,conv_ra_maplet,MapLet}) + end, + %% end of SpillIndex check + ToTempNum = SpillLimit+SpillIndex+1, + MaxTempNum = hipe_gensym:get_var(ppc), + if MaxTempNum >= ToTempNum -> ok; + true -> hipe_gensym:set_var(ppc, ToTempNum) + end, + {From, ToTempNum}; + _ -> exit({?MODULE,conv_ra_maplet,MapLet}) + end. + +mk_ra_map_fp(FPMap, SpillLimit) -> + lists:foldl(fun(MapLet, Map) -> + {Key,Val} = conv_ra_maplet(MapLet, SpillLimit, + is_precoloured_fpr), + gb_trees:insert(Key, Val, Map) + end, + gb_trees:empty(), + FPMap). diff --git a/lib/hipe/ppc/hipe_ppc_ra_ls.erl b/lib/hipe/ppc/hipe_ppc_ra_ls.erl new file mode 100644 index 0000000000..0b5d915ee8 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_ls.erl @@ -0,0 +1,56 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% Linear Scan register allocator for PowerPC + +-module(hipe_ppc_ra_ls). +-export([ra/3]). + +ra(Defun, SpillIndex, Options) -> + NewDefun = Defun, %% hipe_${ARCH}_ra_rename:rename(Defun,Options), + CFG = hipe_ppc_cfg:init(NewDefun), + SpillLimit = hipe_ppc_specific:number_of_temporaries(CFG), + alloc(NewDefun, SpillIndex, SpillLimit, Options). + +alloc(Defun, SpillIndex, SpillLimit, Options) -> + CFG = hipe_ppc_cfg:init(Defun), + {Coloring, _NewSpillIndex} = + regalloc( + CFG, + hipe_ppc_registers:allocatable_gpr()-- + [hipe_ppc_registers:temp3(), + hipe_ppc_registers:temp2(), + hipe_ppc_registers:temp1()], + [hipe_ppc_cfg:start_label(CFG)], + SpillIndex, SpillLimit, Options, + hipe_ppc_specific), + {NewDefun, _DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite( + Defun, Coloring, 'linearscan'), + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + {TempMap2,_NewSpillIndex2} = + hipe_spillmin:stackalloc(CFG, [], SpillIndex, Options, + hipe_ppc_specific, TempMap), + Coloring2 = + hipe_spillmin:mapmerge(hipe_temp_map:to_substlist(TempMap), TempMap2), + {NewDefun, Coloring2}. + +regalloc(CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target) -> + hipe_ls_regalloc:regalloc( + CFG, PhysRegs, Entrypoints, SpillIndex, DontSpill, Options, Target). diff --git a/lib/hipe/ppc/hipe_ppc_ra_naive.erl b/lib/hipe/ppc/hipe_ppc_ra_naive.erl new file mode 100644 index 0000000000..f0ca41b49e --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_naive.erl @@ -0,0 +1,29 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_naive). +-export([ra/3]). + +-include("hipe_ppc.hrl"). + +ra(Defun, _Coloring_fp, _Options) -> % -> {Defun, Coloring} + {NewDefun,_DidSpill} = + hipe_ppc_ra_postconditions:check_and_rewrite2(Defun, [], 'naive'), + {NewDefun, []}. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl new file mode 100644 index 0000000000..142bce39cc --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions.erl @@ -0,0 +1,243 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_postconditions). + +-export([check_and_rewrite/3, check_and_rewrite2/3]). + +-include("hipe_ppc.hrl"). + +check_and_rewrite(Defun, Coloring, Allocator) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific), + check_and_rewrite2(Defun, TempMap, Allocator). + +check_and_rewrite2(Defun, TempMap, Allocator) -> + Strategy = strategy(Allocator), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, Strategy, [], false), + VarRange = {0, hipe_gensym:get_var(ppc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +strategy(Allocator) -> + case Allocator of + 'normal' -> 'new'; + 'linearscan' -> 'fixed'; + 'naive' -> 'fixed' + end. + +do_insns([I|Insns], TempMap, Strategy, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap, Strategy), + do_insns(Insns, TempMap, Strategy, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, _Strategy, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap, Strategy) -> + case I of + #alu{} -> do_alu(I, TempMap, Strategy); + #cmp{} -> do_cmp(I, TempMap, Strategy); + #load{} -> do_load(I, TempMap, Strategy); + #loadx{} -> do_loadx(I, TempMap, Strategy); + #mfspr{} -> do_mfspr(I, TempMap, Strategy); + #mtcr{} -> do_mtcr(I, TempMap, Strategy); + #mtspr{} -> do_mtspr(I, TempMap, Strategy); + #pseudo_li{} -> do_pseudo_li(I, TempMap, Strategy); + #pseudo_move{} -> do_pseudo_move(I, TempMap, Strategy); + #store{} -> do_store(I, TempMap, Strategy); + #storex{} -> do_storex(I, TempMap, Strategy); + #unary{} -> do_unary(I, TempMap, Strategy); + #lfd{} -> do_lfd(I, TempMap, Strategy); + #lfdx{} -> do_lfdx(I, TempMap, Strategy); + #stfd{} -> do_stfd(I, TempMap, Strategy); + #stfdx{} -> do_stfdx(I, TempMap, Strategy); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_alu(I=#alu{dst=Dst,src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc1,NewSrc1,DidSpill2} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill3} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#alu{dst=NewDst,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_cmp(I=#cmp{src1=Src1,src2=Src2}, TempMap, Strategy) -> + {FixSrc1,NewSrc1,DidSpill1} = fix_src1(Src1, TempMap, Strategy), + {FixSrc2,NewSrc2,DidSpill2} = fix_src2_or_imm(Src2, TempMap, Strategy), + NewI = I#cmp{src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI], DidSpill1 or DidSpill2}. + +do_load(I=#load{dst=Dst,base=Base}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixBase,NewBase,DidSpill2} = fix_src1(Base, TempMap, Strategy), + NewI = I#load{dst=NewDst,base=NewBase}, + {FixBase ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_loadx(I=#loadx{dst=Dst,base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixBase1,NewBase1,DidSpill2} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill3} = fix_src2(Base2, TempMap, Strategy), + NewI = I#loadx{dst=NewDst,base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_mfspr(I=#mfspr{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#mfspr{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_mtcr(I=#mtcr{src=Src}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#mtcr{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_mtspr(I=#mtspr{src=Src}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#mtspr{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_pseudo_li(I=#pseudo_li{dst=Dst}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill} = fix_dst(Dst, TempMap, Strategy), + NewI = I#pseudo_li{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_pseudo_move(I=#pseudo_move{dst=Dst,src=Src}, TempMap, Strategy) -> + %% Either Dst or Src (but not both) may be a pseudo temp. + %% pseudo_move and pseudo_tailcall are special cases: in + %% all other instructions, all temps must be non-pseudos + %% after register allocation. + case temp_is_spilled(Dst, TempMap) of + true -> % Src must not be a pseudo + {FixSrc,NewSrc,DidSpill} = fix_src1(Src, TempMap, Strategy), + NewI = I#pseudo_move{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +do_store(I=#store{src=Src,base=Base}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixBase,NewBase,DidSpill2} = fix_src2(Base, TempMap, Strategy), + NewI = I#store{src=NewSrc,base=NewBase}, + {FixSrc ++ FixBase ++ [NewI], DidSpill1 or DidSpill2}. + +do_storex(I=#storex{src=Src,base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixSrc,NewSrc,DidSpill1} = fix_src1(Src, TempMap, Strategy), + {FixBase1,NewBase1,DidSpill2} = fix_src2(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill3} = fix_src3(Base2, TempMap, Strategy), + NewI = I#storex{src=NewSrc,base1=NewBase1,base2=NewBase2}, + {FixSrc ++ FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2 or DidSpill3}. + +do_unary(I=#unary{dst=Dst,src=Src}, TempMap, Strategy) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap, Strategy), + {FixSrc,NewSrc,DidSpill2} = fix_src1(Src, TempMap, Strategy), + NewI = I#unary{dst=NewDst,src=NewSrc}, + {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_lfd(I=#lfd{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#lfd{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +do_lfdx(I=#lfdx{base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixBase1,NewBase1,DidSpill1} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill2} = fix_src2(Base2, TempMap, Strategy), + NewI = I#lfdx{base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2}. + +do_stfd(I=#stfd{base=Base}, TempMap, Strategy) -> + {FixBase,NewBase,DidSpill} = fix_src1(Base, TempMap, Strategy), + NewI = I#stfd{base=NewBase}, + {FixBase ++ [NewI], DidSpill}. + +do_stfdx(I=#stfdx{base1=Base1,base2=Base2}, TempMap, Strategy) -> + {FixBase1,NewBase1,DidSpill1} = fix_src1(Base1, TempMap, Strategy), + {FixBase2,NewBase2,DidSpill2} = fix_src2(Base2, TempMap, Strategy), + NewI = I#stfdx{base1=NewBase1,base2=NewBase2}, + {FixBase1 ++ FixBase2 ++ [NewI], DidSpill1 or DidSpill2}. + +%%% Fix Dst and Src operands. + +fix_src2_or_imm(Src2, TempMap, Strategy) -> + case Src2 of + #ppc_temp{} -> fix_src2(Src2, TempMap, Strategy); + _ -> {[], Src2, false} + end. + +fix_src1(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp1(Strategy)). + +temp1('new') -> []; +temp1('fixed') -> hipe_ppc_registers:temp1(). + +fix_src2(Src, TempMap, Strategy) -> + fix_src(Src, TempMap, temp2(Strategy)). + +temp2('new') -> []; +temp2('fixed') -> hipe_ppc_registers:temp2(). + +fix_src3(Src, TempMap, Strategy) -> % storex :-( + fix_src(Src, TempMap, temp3(Strategy)). + +temp3('new') -> []; +temp3('fixed') -> hipe_ppc_registers:temp3(). + +fix_src(Src, TempMap, RegOpt) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src, RegOpt), + {[hipe_ppc:mk_pseudo_move(NewSrc, Src)], + NewSrc, + true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap, Strategy) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst, temp3(Strategy)), + {[hipe_ppc:mk_pseudo_move(Dst, NewDst)], + NewDst, + true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, []) -> % special case for naive regalloc + not(hipe_ppc:temp_is_precoloured(Temp)); +temp_is_spilled(Temp, TempMap) -> + case hipe_ppc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_ppc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Make a certain reg into a clone of Temp. + +clone(Temp, RegOpt) -> + Type = hipe_ppc:temp_type(Temp), + case RegOpt of + [] -> hipe_ppc:mk_new_temp(Type); + Reg -> hipe_ppc:mk_temp(Reg, Type) + end. diff --git a/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl new file mode 100644 index 0000000000..889c5681ac --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_ra_postconditions_fp.erl @@ -0,0 +1,130 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_ra_postconditions_fp). +-export([check_and_rewrite/2]). +-include("hipe_ppc.hrl"). + +check_and_rewrite(Defun, Coloring) -> + TempMap = hipe_temp_map:cols2tuple(Coloring, hipe_ppc_specific_fp), + #defun{code=Code0} = Defun, + {Code1,DidSpill} = do_insns(Code0, TempMap, [], false), + VarRange = {0, hipe_gensym:get_var(ppc)}, + {Defun#defun{code=Code1, var_range=VarRange}, + DidSpill}. + +do_insns([I|Insns], TempMap, Accum, DidSpill0) -> + {NewIs, DidSpill1} = do_insn(I, TempMap), + do_insns(Insns, TempMap, lists:reverse(NewIs, Accum), DidSpill0 or DidSpill1); +do_insns([], _TempMap, Accum, DidSpill) -> + {lists:reverse(Accum), DidSpill}. + +do_insn(I, TempMap) -> + case I of + #lfd{} -> do_lfd(I, TempMap); + #lfdx{} -> do_lfdx(I, TempMap); + #stfd{} -> do_stfd(I, TempMap); + #stfdx{} -> do_stfdx(I, TempMap); + #fp_binary{} -> do_fp_binary(I, TempMap); + #fp_unary{} -> do_fp_unary(I, TempMap); + #pseudo_fmove{} -> do_pseudo_fmove(I, TempMap); + _ -> {[I], false} + end. + +%%% Fix relevant instruction types. + +do_lfd(I=#lfd{dst=Dst}, TempMap) -> + {FixDst, NewDst, DidSpill} = fix_dst(Dst, TempMap), + NewI = I#lfd{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_lfdx(I=#lfdx{dst=Dst}, TempMap) -> + {FixDst, NewDst, DidSpill} = fix_dst(Dst, TempMap), + NewI = I#lfdx{dst=NewDst}, + {[NewI | FixDst], DidSpill}. + +do_stfd(I=#stfd{src=Src}, TempMap) -> + {FixSrc, NewSrc, DidSpill} = fix_src(Src, TempMap), + NewI = I#stfd{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_stfdx(I=#stfdx{src=Src}, TempMap) -> + {FixSrc, NewSrc, DidSpill} = fix_src(Src, TempMap), + NewI = I#stfdx{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}. + +do_fp_binary(I=#fp_binary{dst=Dst,src1=Src1,src2=Src2}, TempMap) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap), + {FixSrc1,NewSrc1,DidSpill2} = fix_src(Src1, TempMap), + {FixSrc2,NewSrc2,DidSpill3} = fix_src(Src2, TempMap), + NewI = I#fp_binary{dst=NewDst,src1=NewSrc1,src2=NewSrc2}, + {FixSrc1 ++ FixSrc2 ++ [NewI | FixDst], DidSpill1 or DidSpill2 or DidSpill3}. + +do_fp_unary(I=#fp_unary{dst=Dst,src=Src}, TempMap) -> + {FixDst,NewDst,DidSpill1} = fix_dst(Dst, TempMap), + {FixSrc,NewSrc,DidSpill2} = fix_src(Src, TempMap), + NewI = I#fp_unary{dst=NewDst,src=NewSrc}, + {FixSrc ++ [NewI | FixDst], DidSpill1 or DidSpill2}. + +do_pseudo_fmove(I=#pseudo_fmove{dst=Dst,src=Src}, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + {FixSrc,NewSrc,DidSpill} = fix_src(Src, TempMap), + NewI = I#pseudo_fmove{src=NewSrc}, + {FixSrc ++ [NewI], DidSpill}; + _ -> + {[I], false} + end. + +%%% Fix Dst and Src operands. + +fix_src(Src, TempMap) -> + case temp_is_spilled(Src, TempMap) of + true -> + NewSrc = clone(Src), + {[hipe_ppc:mk_pseudo_fmove(NewSrc, Src)], NewSrc, true}; + _ -> + {[], Src, false} + end. + +fix_dst(Dst, TempMap) -> + case temp_is_spilled(Dst, TempMap) of + true -> + NewDst = clone(Dst), + {[hipe_ppc:mk_pseudo_fmove(Dst, NewDst)], NewDst, true}; + _ -> + {[], Dst, false} + end. + +%%% Check if an operand is a pseudo-temp. + +temp_is_spilled(Temp, TempMap) -> + case hipe_ppc:temp_is_allocatable(Temp) of + true -> + Reg = hipe_ppc:temp_reg(Temp), + tuple_size(TempMap) > Reg andalso hipe_temp_map:is_spilled(Reg, TempMap); + false -> true + end. + +%%% Create a new temp with the same type as an old one. + +clone(Temp) -> + Type = hipe_ppc:temp_type(Temp), % XXX: always double? + hipe_ppc:mk_new_temp(Type). diff --git a/lib/hipe/ppc/hipe_ppc_registers.erl b/lib/hipe/ppc/hipe_ppc_registers.erl new file mode 100644 index 0000000000..74aeab3df4 --- /dev/null +++ b/lib/hipe/ppc/hipe_ppc_registers.erl @@ -0,0 +1,246 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_ppc_registers). + +-export([reg_name_gpr/1, + reg_name_fpr/1, + first_virtual/0, + is_precoloured_gpr/1, + is_precoloured_fpr/1, + all_precoloured/0, + return_value/0, + temp1/0, + temp2/0, + temp3/0, % for base2 in storeix :-( + heap_pointer/0, + stack_pointer/0, + proc_pointer/0, + %%heap_limit/0, + %%fcalls/0, + allocatable_gpr/0, + allocatable_fpr/0, + is_fixed/1, + nr_args/0, + arg/1, + args/1, + is_arg/1, % for linear scan + call_clobbered/0, + tailcall_clobbered/0, + live_at_return/0 + ]). + +-include("../rtl/hipe_literals.hrl"). + +-define(R0, 0). +-define(R1, 1). +-define(R2, 2). +-define(R3, 3). +-define(R4, 4). +-define(R5, 5). +-define(R6, 6). +-define(R7, 7). +-define(R8, 8). +-define(R9, 9). +-define(R10, 10). +-define(R11, 11). +-define(R12, 12). +-define(R13, 13). +-define(R14, 14). +-define(R15, 15). +-define(R16, 16). +-define(R17, 17). +-define(R18, 18). +-define(R19, 19). +-define(R20, 20). +-define(R21, 21). +-define(R22, 22). +-define(R23, 23). +-define(R24, 24). +-define(R25, 25). +-define(R26, 26). +-define(R27, 27). +-define(R28, 28). +-define(R29, 29). +-define(R30, 30). +-define(R31, 31). +-define(LAST_PRECOLOURED, 31). % must handle both GPR and FPR ranges + +-define(ARG0, ?R4). +-define(ARG1, ?R5). +-define(ARG2, ?R6). +-define(ARG3, ?R7). +-define(ARG4, ?R8). +-define(ARG5, ?R9). +-define(ARG6, ?R10). + +-define(TEMP1, ?R28). +-define(TEMP2, ?R27). +-define(TEMP3, ?R26). % XXX: for base2 in storeix, switch to R0 instead? + +-define(RETURN_VALUE, ?R3). +-define(HEAP_POINTER, ?R29). +-define(STACK_POINTER, ?R30). +-define(PROC_POINTER, ?R31). + +reg_name_gpr(R) -> [$r | integer_to_list(R)]. +reg_name_fpr(R) -> [$f | integer_to_list(R)]. + +%%% Must handle both GPR and FPR ranges. +first_virtual() -> ?LAST_PRECOLOURED + 1. + +%%% These two tests have the same implementation, but that's +%%% not something we should cast in stone in the interface. +is_precoloured_gpr(R) -> R =< ?LAST_PRECOLOURED. +is_precoloured_fpr(R) -> R =< ?LAST_PRECOLOURED. + +all_precoloured() -> + %% XXX: skip R1, R2, and R13. They should never occur anywhere. + [ ?R0, ?R1, ?R2, ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R9, ?R10, ?R11, ?R12, ?R13, ?R14, ?R15, + ?R16, ?R17, ?R18, ?R19, ?R20, ?R21, ?R22, ?R23, + ?R24, ?R25, ?R26, ?R27, ?R28, ?R29, ?R30, ?R31]. + +return_value() -> ?RETURN_VALUE. + +temp1() -> ?TEMP1. +temp2() -> ?TEMP2. +temp3() -> ?TEMP3. % for base2 in storeix :-( + +heap_pointer() -> ?HEAP_POINTER. + +stack_pointer() -> ?STACK_POINTER. + +proc_pointer() -> ?PROC_POINTER. + +allocatable_gpr() -> + %% r0 is too restricted to be useful for variables + %% r1, r2, and r13 are reserved for C + %% r29, r30, and r31 are fixed global registers + [ ?R3, ?R4, ?R5, ?R6, ?R7, + ?R8, ?R9, ?R10, ?R11, ?R12, ?R14, ?R15, + ?R16, ?R17, ?R18, ?R19, ?R20, ?R21, ?R22, ?R23, + ?R24, ?R25, ?R26, ?R27, ?R28]. + +allocatable_fpr() -> + [ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31]. + +%% Needed for hipe_graph_coloring_regalloc. +%% Presumably true for Reg in AllPrecoloured \ Allocatable. +is_fixed(Reg) -> + case Reg of + ?HEAP_POINTER -> true; + ?STACK_POINTER -> true; + ?PROC_POINTER -> true; + %% The following cases are required for linear scan: + %% it gets confused if it sees a register which is + %% neither allocatable nor global (fixed or one of + %% the scratch registers set aside for linear scan). + ?R0 -> true; + ?R1 -> true; + ?R2 -> true; + ?R13 -> true; + _ -> false + end. + +nr_args() -> ?PPC_NR_ARG_REGS. + +args(Arity) when is_integer(Arity) -> + N = erlang:min(Arity, ?PPC_NR_ARG_REGS), + args(N-1, []). + +args(I, Rest) when is_integer(I), I < 0 -> Rest; +args(I, Rest) -> args(I-1, [arg(I) | Rest]). + +arg(N) -> + if N < ?PPC_NR_ARG_REGS -> + case N of + 0 -> ?ARG0; + 1 -> ?ARG1; + 2 -> ?ARG2; + 3 -> ?ARG3; + 4 -> ?ARG4; + 5 -> ?ARG5; + 6 -> ?ARG6; + _ -> exit({?MODULE, arg, N}) + end; + true -> + exit({?MODULE, arg, N}) + end. + +is_arg(R) -> + case R of + ?ARG0 -> ?PPC_NR_ARG_REGS > 0; + ?ARG1 -> ?PPC_NR_ARG_REGS > 1; + ?ARG2 -> ?PPC_NR_ARG_REGS > 2; + ?ARG3 -> ?PPC_NR_ARG_REGS > 3; + ?ARG4 -> ?PPC_NR_ARG_REGS > 4; + ?ARG5 -> ?PPC_NR_ARG_REGS > 5; + ?ARG6 -> ?PPC_NR_ARG_REGS > 6; + _ -> false + end. + +call_clobbered() -> % does the RA strip the type or not? + [{?R0,tagged},{?R0,untagged}, + %% R1 is reserved for C + %% R2 is reserved for C + {?R3,tagged},{?R3,untagged}, + {?R4,tagged},{?R4,untagged}, + {?R5,tagged},{?R5,untagged}, + {?R6,tagged},{?R6,untagged}, + {?R7,tagged},{?R7,untagged}, + {?R8,tagged},{?R8,untagged}, + {?R9,tagged},{?R9,untagged}, + {?R10,tagged},{?R10,untagged}, + {?R11,tagged},{?R11,untagged}, + {?R12,tagged},{?R12,untagged}, + %% R13 is reserved for C + {?R14,tagged},{?R14,untagged}, + {?R15,tagged},{?R15,untagged}, + {?R16,tagged},{?R16,untagged}, + {?R17,tagged},{?R17,untagged}, + {?R18,tagged},{?R18,untagged}, + {?R19,tagged},{?R19,untagged}, + {?R20,tagged},{?R20,untagged}, + {?R21,tagged},{?R21,untagged}, + {?R22,tagged},{?R22,untagged}, + {?R23,tagged},{?R23,untagged}, + {?R24,tagged},{?R24,untagged}, + {?R25,tagged},{?R25,untagged}, + {?R26,tagged},{?R26,untagged}, + {?R27,tagged},{?R27,untagged}, + {?R28,tagged},{?R28,untagged} + %% R29 is fixed (HP) + %% R30 is fixed (NSP) + %% R31 is fixed (P) + ]. + +tailcall_clobbered() -> % tailcall crapola needs one temp + [{?TEMP1,tagged},{?TEMP1,untagged}]. + +live_at_return() -> + [%%{?LR,untagged}, + {?HEAP_POINTER,untagged}, + {?STACK_POINTER,untagged}, + {?PROC_POINTER,untagged} + ]. diff --git a/lib/hipe/ppc/hipe_rtl_to_ppc.erl b/lib/hipe/ppc/hipe_rtl_to_ppc.erl new file mode 100644 index 0000000000..458af250de --- /dev/null +++ b/lib/hipe/ppc/hipe_rtl_to_ppc.erl @@ -0,0 +1,1249 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%% The PowerPC instruction set is quite irregular. +%%% The following quirks must be handled by the translation: +%%% +%%% - The instruction names are different for reg/reg and reg/imm +%%% source operands. For some operations, completely different +%%% instructions handle the reg/reg and reg/imm cases. +%%% - The name of an arithmetic instruction depends on whether any +%%% condition codes are to be set or not. Overflow is treated +%%% separately from other conditions. +%%% - Some combinations or RTL ALU operations, source operand shapes, +%%% and requested conditions have no direct correspondence in the +%%% PowerPC instruction set. +%%% - The tagging of immediate operands as simm16 or uimm16 depends +%%% on the actual instruction. +%%% - Conditional branches have no unsigned conditions. Instead there +%%% are signed and unsigned versions of the compare instruction. +%%% - The arithmetic overflow flag XER[SO] is sticky: once set it +%%% remains set until explicitly cleared. + +-module(hipe_rtl_to_ppc). +-export([translate/1]). + +-include("../rtl/hipe_rtl.hrl"). + +translate(RTL) -> + hipe_gensym:init(ppc), + hipe_gensym:set_var(ppc, hipe_ppc_registers:first_virtual()), + hipe_gensym:set_label(ppc, hipe_gensym:get_label(rtl)), + Map0 = vmap_empty(), + {Formals, Map1} = conv_formals(hipe_rtl:rtl_params(RTL), Map0), + OldData = hipe_rtl:rtl_data(RTL), + {Code0, NewData} = conv_insn_list(hipe_rtl:rtl_code(RTL), Map1, OldData), + {RegFormals, _} = split_args(Formals), + Code = + case RegFormals of + [] -> Code0; + _ -> [hipe_ppc:mk_label(hipe_gensym:get_next_label(ppc)) | + move_formals(RegFormals, Code0)] + end, + IsClosure = hipe_rtl:rtl_is_closure(RTL), + IsLeaf = hipe_rtl:rtl_is_leaf(RTL), + hipe_ppc:mk_defun(hipe_rtl:rtl_fun(RTL), + Formals, + IsClosure, + IsLeaf, + Code, + NewData, + [], + []). + +conv_insn_list([H|T], Map, Data) -> + {NewH, NewMap, NewData1} = conv_insn(H, Map, Data), + %% io:format("~w \n ==>\n ~w\n- - - - - - - - -\n",[H,NewH]), + {NewT, NewData2} = conv_insn_list(T, NewMap, NewData1), + {NewH ++ NewT, NewData2}; +conv_insn_list([], _, Data) -> + {[], Data}. + +conv_insn(I, Map, Data) -> + case I of + #alu{} -> conv_alu(I, Map, Data); + #alub{} -> conv_alub(I, Map, Data); + #branch{} -> conv_branch(I, Map, Data); + #call{} -> conv_call(I, Map, Data); + #comment{} -> conv_comment(I, Map, Data); + #enter{} -> conv_enter(I, Map, Data); + #goto{} -> conv_goto(I, Map, Data); + #label{} -> conv_label(I, Map, Data); + #load{} -> conv_load(I, Map, Data); + #load_address{} -> conv_load_address(I, Map, Data); + #load_atom{} -> conv_load_atom(I, Map, Data); + #move{} -> conv_move(I, Map, Data); + #return{} -> conv_return(I, Map, Data); + #store{} -> conv_store(I, Map, Data); + #switch{} -> conv_switch(I, Map, Data); + #fconv{} -> conv_fconv(I, Map, Data); + #fmove{} -> conv_fmove(I, Map, Data); + #fload{} -> conv_fload(I, Map, Data); + #fstore{} -> conv_fstore(I, Map, Data); + #fp{} -> conv_fp_binary(I, Map, Data); + #fp_unop{} -> conv_fp_unary(I, Map, Data); + _ -> exit({?MODULE,conv_insn,I}) + end. + +conv_fconv(I, Map, Data) -> + %% Dst := (double)Src, where Dst is FP reg and Src is int reg + {Dst, Map0} = conv_fpreg(hipe_rtl:fconv_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:fconv_src(I), Map0), % exclude imm src + I2 = mk_fconv(Dst, Src), + {I2, Map1, Data}. + +mk_fconv(Dst, Src) -> + CSP = hipe_ppc:mk_temp(1, 'untagged'), + R0 = hipe_ppc:mk_temp(0, 'untagged'), + RTmp1 = hipe_ppc:mk_new_temp('untagged'), + RTmp2 = hipe_ppc:mk_new_temp('untagged'), + RTmp3 = hipe_ppc:mk_new_temp('untagged'), + FTmp1 = hipe_ppc:mk_new_temp('double'), + FTmp2 = hipe_ppc:mk_new_temp('double'), + [hipe_ppc:mk_pseudo_li(RTmp1, {fconv_constant,c_const}), + hipe_ppc:mk_lfd(FTmp1, 0, RTmp1), + hipe_ppc:mk_alu('xoris', RTmp2, Src, hipe_ppc:mk_uimm16(16#8000)), + hipe_ppc:mk_store('stw', RTmp2, 28, CSP), + hipe_ppc:mk_alu('addis', RTmp3, R0, hipe_ppc:mk_simm16(16#4330)), + hipe_ppc:mk_store('stw', RTmp3, 24, CSP), + hipe_ppc:mk_lfd(FTmp2, 24, CSP), + hipe_ppc:mk_fp_binary('fsub', Dst, FTmp2, FTmp1)]. + +conv_fmove(I, Map, Data) -> + %% Dst := Src, where both Dst and Src are FP regs + {Dst, Map0} = conv_fpreg(hipe_rtl:fmove_dst(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fmove_src(I), Map0), + I2 = mk_fmove(Dst, Src), + {I2, Map1, Data}. + +mk_fmove(Dst, Src) -> + [hipe_ppc:mk_pseudo_fmove(Dst, Src)]. + +conv_fload(I, Map, Data) -> + %% Dst := MEM[Base+Off], where Dst is FP reg + {Dst, Map0} = conv_fpreg(hipe_rtl:fload_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:fload_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:fload_offset(I), Map1), + I2 = mk_fload(Dst, Base1, Base2), + {I2, Map2, Data}. + +mk_fload(Dst, Base1, Base2) -> + case hipe_ppc:is_temp(Base1) of + true -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fload_rr(Dst, Base1, Base2); + _ -> + mk_fload_ri(Dst, Base1, Base2) + end; + _ -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fload_ri(Dst, Base2, Base1); + _ -> + mk_fload_ii(Dst, Base1, Base2) + end + end. + +mk_fload_ii(Dst, Base1, Base2) -> + io:format("~w: RTL fload with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_fload_ri(Dst, Tmp, Base2)). + +mk_fload_ri(Dst, Base, Disp) -> + hipe_ppc:mk_fload(Dst, Disp, Base, 'new'). + +mk_fload_rr(Dst, Base1, Base2) -> + [hipe_ppc:mk_lfdx(Dst, Base1, Base2)]. + +conv_fstore(I, Map, Data) -> + %% MEM[Base+Off] := Src, where Src is FP reg + {Base1, Map0} = conv_dst(hipe_rtl:fstore_base(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fstore_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:fstore_offset(I), Map1), + I2 = mk_fstore(Src, Base1, Base2), + {I2, Map2, Data}. + +mk_fstore(Src, Base1, Base2) -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_fstore_rr(Src, Base1, Base2); + _ -> + mk_fstore_ri(Src, Base1, Base2) + end. + +mk_fstore_ri(Src, Base, Disp) -> + hipe_ppc:mk_fstore(Src, Disp, Base, 'new'). + +mk_fstore_rr(Src, Base1, Base2) -> + [hipe_ppc:mk_stfdx(Src, Base1, Base2)]. + +conv_fp_binary(I, Map, Data) -> + {Dst, Map0} = conv_fpreg(hipe_rtl:fp_dst(I), Map), + {Src1, Map1} = conv_fpreg(hipe_rtl:fp_src1(I), Map0), + {Src2, Map2} = conv_fpreg(hipe_rtl:fp_src2(I), Map1), + RtlFpOp = hipe_rtl:fp_op(I), + I2 = mk_fp_binary(Dst, Src1, RtlFpOp, Src2), + {I2, Map2, Data}. + +mk_fp_binary(Dst, Src1, RtlFpOp, Src2) -> + FpBinOp = + case RtlFpOp of + 'fadd' -> 'fadd'; + 'fdiv' -> 'fdiv'; + 'fmul' -> 'fmul'; + 'fsub' -> 'fsub' + end, + [hipe_ppc:mk_fp_binary(FpBinOp, Dst, Src1, Src2)]. + +conv_fp_unary(I, Map, Data) -> + {Dst, Map0} = conv_fpreg(hipe_rtl:fp_unop_dst(I), Map), + {Src, Map1} = conv_fpreg(hipe_rtl:fp_unop_src(I), Map0), + RtlFpUnOp = hipe_rtl:fp_unop_op(I), + I2 = mk_fp_unary(Dst, Src, RtlFpUnOp), + {I2, Map1, Data}. + +mk_fp_unary(Dst, Src, RtlFpUnOp) -> + FpUnOp = + case RtlFpUnOp of + 'fchs' -> 'fneg' + end, + [hipe_ppc:mk_fp_unary(FpUnOp, Dst, Src)]. + +conv_alu(I, Map, Data) -> + %% dst = src1 aluop src2 + {Dst, Map0} = conv_dst(hipe_rtl:alu_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alu_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alu_src2(I), Map1), + RtlAluOp = hipe_rtl:alu_op(I), + I2 = mk_alu(Dst, Src1, RtlAluOp, Src2), + {I2, Map2, Data}. + +mk_alu(Dst, Src1, RtlAluOp, Src2) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alu_rr(Dst, Src1, RtlAluOp, Src2); + _ -> + mk_alu_ri(Dst, Src1, RtlAluOp, Src2) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alu_ir(Dst, Src1, RtlAluOp, Src2); + _ -> + mk_alu_ii(Dst, Src1, RtlAluOp, Src2) + end + end. + +mk_alu_ii(Dst, Src1, RtlAluOp, Src2) -> + io:format("~w: RTL alu with two immediates (~w ~w ~w)\n", + [?MODULE, Src1, RtlAluOp, Src2]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alu_ri(Dst, Tmp, RtlAluOp, Src2)). + +mk_alu_ir(Dst, Src1, RtlAluOp, Src2) -> + case rtl_aluop_commutes(RtlAluOp) of + true -> + mk_alu_ri(Dst, Src2, RtlAluOp, Src1); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alu_rr(Dst, Tmp, RtlAluOp, Src2)) + end. + +mk_alu_ri(Dst, Src1, RtlAluOp, Src2) -> + case RtlAluOp of + 'sub' -> % there is no 'subi' + mk_alu_ri_addi(Dst, Src1, -Src2); + 'add' -> % 'addi' has a 16-bit simm operand + mk_alu_ri_addi(Dst, Src1, Src2); + 'mul' -> % 'mulli' has a 16-bit simm operand + mk_alu_ri_simm16(Dst, Src1, RtlAluOp, 'mulli', Src2); + 'and' -> % 'andi.' has a 16-bit uimm operand + case rlwinm_mask(Src2) of + {MB,ME} -> + [hipe_ppc:mk_unary({'rlwinm',0,MB,ME}, Dst, Src1)]; + _ -> + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'andi.', Src2) + end; + 'or' -> % 'ori' has a 16-bit uimm operand + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'ori', Src2); + 'xor' -> % 'xori' has a 16-bit uimm operand + mk_alu_ri_bitop(Dst, Src1, RtlAluOp, 'xori', Src2); + _ -> % shift ops have 5-bit uimm operands + mk_alu_ri_shift(Dst, Src1, RtlAluOp, Src2) + end. + +rlwinm_mask(Imm) -> + Res1 = rlwinm_mask2(Imm), + case Res1 of + {_MB,_ME} -> Res1; + [] -> + case rlwinm_mask2(bnot Imm) of + {MB,ME} -> {ME+1,MB-1}; + [] -> [] + end + end. + +rlwinm_mask2(Imm) -> + case Imm band 16#ffffffff of + 0 -> []; + Word -> + MB = lsb_log2(Word), % first 1 bit + case bnot(Word bsr MB) band 16#ffffffff of + 0 -> []; % Imm was all-bits-one XXX: we should handle this + Word1 -> + ME1 = lsb_log2(Word1),% first 0 bit after the 1s + case Word bsr (MB+ME1) of + 0 -> + ME = MB+ME1-1, % last 1 bit + {31-ME, 31-MB}; % convert to PPC sick and twisted bit numbers + _ -> + [] + end + end + end. + +lsb_log2(Word) -> % PRE: Word =/= 0 + bitN_log2(Word band -Word, 0). + +bitN_log2(BitN, ShiftN) -> + if BitN > 16#ffff -> + bitN_log2(BitN bsr 16, ShiftN + 16); + true -> + ShiftN + hweight16(BitN - 1) + end. + +hweight16(Word) -> % PRE: 0 <= Word <= 16#ffff + Res1 = (Word band 16#5555) + ((Word bsr 1) band 16#5555), + Res2 = (Res1 band 16#3333) + ((Res1 bsr 2) band 16#3333), + Res3 = (Res2 band 16#0F0F) + ((Res2 bsr 4) band 16#0F0F), + (Res3 band 16#00FF) + ((Res3 bsr 8) band 16#00FF). + +mk_alu_ri_addi(Dst, Src1, Src2) -> + mk_alu_ri_simm16(Dst, Src1, 'add', 'addi', Src2). + +mk_alu_ri_simm16(Dst, Src1, RtlAluOp, AluOp, Src2) -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_simm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_bitop(Dst, Src1, RtlAluOp, AluOp, Src2) -> + if is_integer(Src2), 0 =< Src2, Src2 < 65536 -> + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_shift(Dst, Src1, RtlAluOp, Src2) -> + if Src2 < 32, Src2 >= 0 -> + AluOp = + case RtlAluOp of + 'sll' -> 'slwi'; % alias for rlwinm + 'srl' -> 'srwi'; % alias for rlwinm + 'sra' -> 'srawi' + end, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) + end. + +mk_alu_ri_rr(Dst, Src1, RtlAluOp, Src2) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + mk_alu_rr(Dst, Src1, RtlAluOp, Tmp)). + +mk_alu_rr(Dst, Src1, RtlAluOp, Src2) -> + case RtlAluOp of + 'sub' -> % PPC weirdness + [hipe_ppc:mk_alu('subf', Dst, Src2, Src1)]; + _ -> + AluOp = + case RtlAluOp of + 'add' -> 'add'; + 'mul' -> 'mullw'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'slw'; + 'srl' -> 'srw'; + 'sra' -> 'sraw' + end, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, Src2)] + end. + +conv_alub(I, Map, Data) -> + %% dst = src1 aluop src2; if COND goto label + {Dst, Map0} = conv_dst(hipe_rtl:alub_dst(I), Map), + {Src1, Map1} = conv_src(hipe_rtl:alub_src1(I), Map0), + {Src2, Map2} = conv_src(hipe_rtl:alub_src2(I), Map1), + {AluOp, BCond} = + case {hipe_rtl:alub_op(I), hipe_rtl:alub_cond(I)} of + {'add', 'ltu'} -> + {'addc', 'eq'}; + {RtlAlubOp, RtlAlubCond} -> + {conv_alub_op(RtlAlubOp), conv_alub_cond(RtlAlubCond)} + end, + BC = mk_pseudo_bc(BCond, + hipe_rtl:alub_true_label(I), + hipe_rtl:alub_false_label(I), + hipe_rtl:alub_pred(I)), + I2 = + case {AluOp, BCond} of + {'addc', 'eq'} -> % copy XER[CA] to CR0[EQ] before the BC + TmpR = new_untagged_temp(), + [hipe_ppc:mk_mfspr(TmpR, 'xer'), + hipe_ppc:mk_mtcr(TmpR) | + BC]; + _ -> BC + end, + {NewSrc1, NewSrc2} = + case AluOp of + 'subf' -> {Src2, Src1}; + _ -> {Src1, Src2} + end, + I1 = mk_alub(Dst, NewSrc1, AluOp, NewSrc2, BCond), + {I1 ++ I2, Map2, Data}. + +conv_alub_op(RtlAluOp) -> + case RtlAluOp of + 'add' -> 'add'; + 'sub' -> 'subf'; % XXX: must swap operands + 'mul' -> 'mullw'; + 'or' -> 'or'; + 'and' -> 'and'; + 'xor' -> 'xor'; + 'sll' -> 'slw'; + 'srl' -> 'srw'; + 'sra' -> 'sraw' + end. + +aluop_commutes(AluOp) -> + case AluOp of + 'add' -> true; + 'addc' -> true; + 'subf' -> false; + 'mullw' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + 'slw' -> false; + 'srw' -> false; + 'sraw' -> false + end. + +conv_alub_cond(Cond) -> % only signed + case Cond of + eq -> 'eq'; + ne -> 'ne'; + gt -> 'gt'; + ge -> 'ge'; + lt -> 'lt'; + le -> 'le'; + overflow -> 'so'; + not_overflow -> 'ns'; + _ -> exit({?MODULE,conv_alub_cond,Cond}) + end. + +mk_alub(Dst, Src1, AluOp, Src2, BCond) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alub_rr(Dst, Src1, AluOp, Src2, BCond); + _ -> + mk_alub_ri(Dst, Src1, AluOp, Src2, BCond) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_alub_ir(Dst, Src1, AluOp, Src2, BCond); + _ -> + mk_alub_ii(Dst, Src1, AluOp, Src2, BCond) + end + end. + +mk_alub_ii(Dst, Src1, AluOp, Src2, BCond) -> + io:format("~w: RTL alub with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alub_ri(Dst, Tmp, AluOp, Src2, BCond)). + +mk_alub_ir(Dst, Src1, AluOp, Src2, BCond) -> + case aluop_commutes(AluOp) of + true -> + mk_alub_ri(Dst, Src2, AluOp, Src1, BCond); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_alub_rr(Dst, Tmp, AluOp, Src2, BCond)) + end. + +mk_alub_ri(Dst, Src1, AluOp, Src2, BCond) -> + true = is_integer(Src2), + case BCond of + 'so' -> mk_alub_ri_OE(Dst, Src1, AluOp, Src2); + 'ns' -> mk_alub_ri_OE(Dst, Src1, AluOp, Src2); + _ -> mk_alub_ri_Rc(Dst, Src1, AluOp, Src2) + end. + +mk_alub_ri_OE(Dst, Src1, AluOp, Src2) -> + %% Only 'add', 'subf', and 'mullw' apply here, and 'subf' becomes 'add'. + %% 'add' and 'mullw' have no immediate+Rc+OE forms. + %% Rewrite to reg/reg form. Sigh. + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + mk_alub_rr_OE(Dst, Src1, AluOp, Tmp)). + +mk_alub_ri_Rc(Dst, Src1, AluOp, Src2) -> + case AluOp of + 'subf' -> % there is no 'subfi.', use 'addic.' or 'add.' + mk_alub_ri_Rc_addi(Dst, Src1, -Src2, 'addic.', 'add.'); + 'add' -> % 'addic.' has a 16-bit simm operand + mk_alub_ri_Rc_addi(Dst, Src1, Src2, 'addic.', 'add.'); + 'addc' -> % 'addic' has a 16-bit simm operand + mk_alub_ri_Rc_addi(Dst, Src1, Src2, 'addic', 'addc'); + 'mullw' -> % there is no 'mulli.' + mk_alub_ri_Rc_rr(Dst, Src1, 'mullw.', Src2); + 'or' -> % there is no 'ori.' + mk_alub_ri_Rc_rr(Dst, Src1, 'or.', Src2); + 'xor' -> % there is no 'xori.' + mk_alub_ri_Rc_rr(Dst, Src1, 'xor.', Src2); + 'and' -> % 'andi.' has a 16-bit uimm operand + case rlwinm_mask(Src2) of + {MB,ME} -> + [hipe_ppc:mk_unary({'rlwinm.',0,MB,ME}, Dst, Src1)]; + _ -> + mk_alub_ri_Rc_andi(Dst, Src1, Src2) + end; + _ -> % shift ops have 5-bit uimm operands + mk_alub_ri_Rc_shift(Dst, Src1, AluOp, Src2) + end. + +mk_alub_ri_Rc_addi(Dst, Src1, Src2, AddImmOp, AddRegOp) -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + [hipe_ppc:mk_alu(AddImmOp, Dst, Src1, + hipe_ppc:mk_simm16(Src2))]; + true -> + mk_alub_ri_Rc_rr(Dst, Src1, AddRegOp, Src2) + end. + +mk_alub_ri_Rc_andi(Dst, Src1, Src2) -> + if Src2 < 65536, Src2 >= 0 -> + [hipe_ppc:mk_alu('andi.', Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + mk_alub_ri_Rc_rr(Dst, Src1, 'and.', Src2) + end. + +mk_alub_ri_Rc_shift(Dst, Src1, AluOp, Src2) -> + if Src2 < 32, Src2 >= 0 -> + AluOpIDot = + case AluOp of + 'slw' -> 'slwi.'; % alias for rlwinm. + 'srw' -> 'srwi.'; % alias for rlwinm. + 'sraw' -> 'srawi.' + end, + [hipe_ppc:mk_alu(AluOpIDot, Dst, Src1, + hipe_ppc:mk_uimm16(Src2))]; + true -> + AluOpDot = + case AluOp of + 'slw' -> 'slw.'; + 'srw' -> 'srw.'; + 'sraw' -> 'sraw.' + end, + mk_alub_ri_Rc_rr(Dst, Src1, AluOpDot, Src2) + end. + +mk_alub_ri_Rc_rr(Dst, Src1, AluOp, Src2) -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src2, + [hipe_ppc:mk_alu(AluOp, Dst, Src1, Tmp)]). + +mk_alub_rr(Dst, Src1, AluOp, Src2, BCond) -> + case BCond of + 'so' -> mk_alub_rr_OE(Dst, Src1, AluOp, Src2); + 'ns' -> mk_alub_rr_OE(Dst, Src1, AluOp, Src2); + _ -> mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) + end. + +mk_alub_rr_OE(Dst, Src1, AluOp, Src2) -> + AluOpODot = + case AluOp of + 'subf' -> 'subfo.'; + 'add' -> 'addo.'; + 'mullw' -> 'mullwo.' + %% fail for addc, or, and, xor, slw, srw, sraw + end, + [hipe_ppc:mk_alu(AluOpODot, Dst, Src1, Src2)]. + +mk_alub_rr_Rc(Dst, Src1, AluOp, Src2) -> + AluOpDot = + case AluOp of + 'subf' -> 'subf.'; + 'add' -> 'add.'; + 'addc' -> 'addc'; % only interested in CA, no Rc needed + 'mullw' -> 'mullw.'; + 'or' -> 'or.'; + 'and' -> 'and.'; + 'xor' -> 'xor.'; + 'slw' -> 'slw.'; + 'srw' -> 'srw.'; + 'sraw' -> 'sraw.' + end, + [hipe_ppc:mk_alu(AluOpDot, Dst, Src1, Src2)]. + +conv_branch(I, Map, Data) -> + %% = src1 - src2; if COND goto label + {Src1, Map0} = conv_src(hipe_rtl:branch_src1(I), Map), + {Src2, Map1} = conv_src(hipe_rtl:branch_src2(I), Map0), + {BCond,Sign} = conv_branch_cond(hipe_rtl:branch_cond(I)), + I2 = mk_branch(Src1, BCond, Sign, Src2, + hipe_rtl:branch_true_label(I), + hipe_rtl:branch_false_label(I), + hipe_rtl:branch_pred(I)), + {I2, Map1, Data}. + +conv_branch_cond(Cond) -> % may be unsigned + case Cond of + gtu -> {'gt', 'unsigned'}; + geu -> {'ge', 'unsigned'}; + ltu -> {'lt', 'unsigned'}; + leu -> {'le', 'unsigned'}; + _ -> {conv_alub_cond(Cond), 'signed'} + end. + +mk_branch(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + case hipe_ppc:is_temp(Src1) of + true -> + case hipe_ppc:is_temp(Src2) of + true -> + mk_branch_rr(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ri(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) + end; + _ -> + case hipe_ppc:is_temp(Src2) of + true -> + NewBCond = commute_bcond(BCond), + mk_branch_ri(Src2, NewBCond, Sign, Src1, TrueLab, FalseLab, Pred); + _ -> + mk_branch_ii(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) + end + end. + +commute_bcond(BCond) -> % if x BCond y, then y commute_bcond(BCond) x + case BCond of + 'eq' -> 'eq'; % ==, == + 'ne' -> 'ne'; % !=, != + 'gt' -> 'lt'; % >, < + 'ge' -> 'le'; % >=, <= + 'lt' -> 'gt'; % <, > + 'le' -> 'ge'; % <=, >= + %% so/ns: n/a + _ -> exit({?MODULE,commute_bcond,BCond}) + end. + +mk_branch_ii(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + io:format("~w: RTL branch with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Src1, + mk_branch_ri(Tmp, BCond, Sign, Src2, + TrueLab, FalseLab, Pred)). + +mk_branch_ri(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + {FixSrc2,NewSrc2,CmpOp} = + case Sign of + 'signed' -> + if is_integer(Src2), -32768 =< Src2, Src2 < 32768 -> + {[], hipe_ppc:mk_simm16(Src2), 'cmpi'}; + true -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Src2), Tmp, 'cmp'} + end; + 'unsigned' -> + if is_integer(Src2), 0 =< Src2, Src2 < 65536 -> + {[], hipe_ppc:mk_uimm16(Src2), 'cmpli'}; + true -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Src2), Tmp, 'cmpl'} + end + end, + FixSrc2 ++ + mk_cmp_bc(CmpOp, Src1, NewSrc2, BCond, TrueLab, FalseLab, Pred). + +mk_branch_rr(Src1, BCond, Sign, Src2, TrueLab, FalseLab, Pred) -> + CmpOp = + case Sign of + 'signed' -> 'cmp'; + 'unsigned' -> 'cmpl' + end, + mk_cmp_bc(CmpOp, Src1, Src2, BCond, TrueLab, FalseLab, Pred). + +mk_cmp_bc(CmpOp, Src1, Src2, BCond, TrueLab, FalseLab, Pred) -> + [hipe_ppc:mk_cmp(CmpOp, Src1, Src2) | + mk_pseudo_bc(BCond, TrueLab, FalseLab, Pred)]. + +conv_call(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:call_arglist(I), Map), + {Dsts, Map1} = conv_dst_list(hipe_rtl:call_dstlist(I), Map0), + {Fun, Map2} = conv_fun(hipe_rtl:call_fun(I), Map1), + ContLab = hipe_rtl:call_continuation(I), + ExnLab = hipe_rtl:call_fail(I), + Linkage = hipe_rtl:call_type(I), + I2 = mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage), + {I2, Map2, Data}. + +mk_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + case hipe_ppc:is_prim(Fun) of + true -> + mk_primop_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage); + false -> + mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) + end. + +mk_primop_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) -> + case hipe_ppc:prim_prim(Prim) of + 'extsh' -> + mk_extsh_call(Dsts, Args, ContLab, ExnLab, Linkage); + 'lhbrx' -> + mk_lhbrx_call(Dsts, Args, ContLab, ExnLab, Linkage); + 'lwbrx' -> + mk_lwbrx_call(Dsts, Args, ContLab, ExnLab, Linkage); + _ -> + mk_general_call(Dsts, Prim, Args, ContLab, ExnLab, Linkage) + end. + +mk_extsh_call([Dst], [Src], [], [], not_remote) -> + true = hipe_ppc:is_temp(Src), + [hipe_ppc:mk_unary('extsh', Dst, Src)]. + +mk_lhbrx_call(Dsts, [Base,Offset], [], [], not_remote) -> + case Dsts of + [Dst] -> mk_loadx('lhbrx', Dst, Base, Offset); + [] -> [] % result unused, cancel the operation + end. + +mk_lwbrx_call([Dst], [Base,Offset], [], [], not_remote) -> + mk_loadx('lwbrx', Dst, Base, Offset). + +mk_loadx(LdxOp, Dst, Base, Offset) -> + true = hipe_ppc:is_temp(Base), + {FixOff,NewOff} = + case hipe_ppc:is_temp(Offset) of + true -> {[], Offset}; + false -> + Tmp = new_untagged_temp(), + {mk_li(Tmp, Offset), Tmp} + end, + FixOff ++ [hipe_ppc:mk_loadx(LdxOp, Dst, Base, NewOff)]. + +mk_general_call(Dsts, Fun, Args, ContLab, ExnLab, Linkage) -> + %% The backend does not support pseudo_calls without a + %% continuation label, so we make sure each call has one. + {RealContLab, Tail} = + case mk_call_results(Dsts) of + [] -> + %% Avoid consing up a dummy basic block if the moves list + %% is empty, as is typical for calls to suspend/0. + %% This should be subsumed by a general "optimise the CFG" + %% module, and could probably be removed. + case ContLab of + [] -> + NewContLab = hipe_gensym:get_next_label(ppc), + {NewContLab, [hipe_ppc:mk_label(NewContLab)]}; + _ -> + {ContLab, []} + end; + Moves -> + %% Change the call to continue at a new basic block. + %% In this block move the result registers to the Dsts, + %% then continue at the call's original continuation. + NewContLab = hipe_gensym:get_next_label(ppc), + case ContLab of + [] -> + %% This is just a fallthrough + %% No jump back after the moves. + {NewContLab, + [hipe_ppc:mk_label(NewContLab) | + Moves]}; + _ -> + %% The call has a continuation. Jump to it. + {NewContLab, + [hipe_ppc:mk_label(NewContLab) | + Moves ++ + [hipe_ppc:mk_b_label(ContLab)]]} + end + end, + SDesc = hipe_ppc:mk_sdesc(ExnLab, 0, length(Args), {}), + {FixFunC,FunC} = fix_func(Fun), + CallInsn = hipe_ppc:mk_pseudo_call(FunC, SDesc, RealContLab, Linkage), + {RegArgs,StkArgs} = split_args(Args), + FixFunC ++ + mk_push_args(StkArgs, move_actuals(RegArgs, [CallInsn | Tail])). + +mk_call_results([]) -> + []; +mk_call_results([Dst]) -> + RV = hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged'), + [hipe_ppc:mk_pseudo_move(Dst, RV)]; +mk_call_results(Dsts) -> + exit({?MODULE,mk_call_results,Dsts}). + +fix_func(Fun) -> + case hipe_ppc:is_temp(Fun) of + true -> {[hipe_ppc:mk_mtspr('ctr', Fun)], 'ctr'}; + _ -> {[], Fun} + end. + +mk_push_args(StkArgs, Tail) -> + case length(StkArgs) of + 0 -> + Tail; + NrStkArgs -> + [hipe_ppc:mk_pseudo_call_prepare(NrStkArgs) | + mk_store_args(StkArgs, NrStkArgs * word_size(), Tail)] + end. + +mk_store_args([Arg|Args], PrevOffset, Tail) -> + Offset = PrevOffset - word_size(), + {Src,FixSrc} = + case hipe_ppc:is_temp(Arg) of + true -> + {Arg, []}; + _ -> + Tmp = new_tagged_temp(), + {Tmp, mk_li(Tmp, Arg)} + end, + Store = hipe_ppc:mk_store('stw', Src, Offset, mk_sp()), + mk_store_args(Args, Offset, FixSrc ++ [Store | Tail]); +mk_store_args([], _, Tail) -> + Tail. + +conv_comment(I, Map, Data) -> + I2 = [hipe_ppc:mk_comment(hipe_rtl:comment_text(I))], + {I2, Map, Data}. + +conv_enter(I, Map, Data) -> + {Args, Map0} = conv_src_list(hipe_rtl:enter_arglist(I), Map), + {Fun, Map1} = conv_fun(hipe_rtl:enter_fun(I), Map0), + I2 = mk_enter(Fun, Args, hipe_rtl:enter_type(I)), + {I2, Map1, Data}. + +mk_enter(Fun, Args, Linkage) -> + {FixFunC,FunC} = fix_func(Fun), + Arity = length(Args), + {RegArgs,StkArgs} = split_args(Args), + FixFunC ++ + move_actuals(RegArgs, + [hipe_ppc:mk_pseudo_tailcall_prepare(), + hipe_ppc:mk_pseudo_tailcall(FunC, Arity, StkArgs, Linkage)]). + +conv_goto(I, Map, Data) -> + I2 = [hipe_ppc:mk_b_label(hipe_rtl:goto_label(I))], + {I2, Map, Data}. + +conv_label(I, Map, Data) -> + I2 = [hipe_ppc:mk_label(hipe_rtl:label_name(I))], + {I2, Map, Data}. + +conv_load(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_dst(I), Map), + {Base1, Map1} = conv_src(hipe_rtl:load_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:load_offset(I), Map1), + LoadSize = hipe_rtl:load_size(I), + LoadSign = hipe_rtl:load_sign(I), + I2 = mk_load(Dst, Base1, Base2, LoadSize, LoadSign), + {I2, Map2, Data}. + +mk_load(Dst, Base1, Base2, LoadSize, LoadSign) -> + Rest = + case LoadSize of + byte -> + case LoadSign of + signed -> [hipe_ppc:mk_unary('extsb', Dst, Dst)]; + _ -> [] + end; + _ -> [] + end, + LdOp = + case LoadSize of + byte -> 'lbz'; + int32 -> 'lwz'; + word -> 'lwz'; + int16 -> + case LoadSign of + signed -> 'lha'; + unsigned -> 'lhz' + end + end, + case hipe_ppc:is_temp(Base1) of + true -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_load_rr(Dst, Base1, Base2, LdOp, Rest); + _ -> + mk_load_ri(Dst, Base1, Base2, LdOp, Rest) + end; + _ -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_load_ri(Dst, Base2, Base1, LdOp, Rest); + _ -> + mk_load_ii(Dst, Base1, Base2, LdOp, Rest) + end + end. + +mk_load_ii(Dst, Base1, Base2, LdOp, Rest) -> + io:format("~w: RTL load with two immediates\n", [?MODULE]), + Tmp = new_untagged_temp(), + mk_li(Tmp, Base1, + mk_load_ri(Dst, Tmp, Base2, LdOp, Rest)). + +mk_load_ri(Dst, Base, Disp, LdOp, Rest) -> + hipe_ppc:mk_load(LdOp, Dst, Disp, Base, 'new', Rest). + +mk_load_rr(Dst, Base1, Base2, LdOp, Rest) -> + LdxOp = hipe_ppc:ldop_to_ldxop(LdOp), + [hipe_ppc:mk_loadx(LdxOp, Dst, Base1, Base2) | Rest]. + +conv_load_address(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_address_dst(I), Map), + Addr = hipe_rtl:load_address_addr(I), + Type = hipe_rtl:load_address_type(I), + Src = {Addr,Type}, + I2 = [hipe_ppc:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_load_atom(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:load_atom_dst(I), Map), + Src = hipe_rtl:load_atom_atom(I), + I2 = [hipe_ppc:mk_pseudo_li(Dst, Src)], + {I2, Map0, Data}. + +conv_move(I, Map, Data) -> + {Dst, Map0} = conv_dst(hipe_rtl:move_dst(I), Map), + {Src, Map1} = conv_src(hipe_rtl:move_src(I), Map0), + I2 = mk_move(Dst, Src, []), + {I2, Map1, Data}. + +mk_move(Dst, Src, Tail) -> + case hipe_ppc:is_temp(Src) of + true -> [hipe_ppc:mk_pseudo_move(Dst, Src) | Tail]; + _ -> mk_li(Dst, Src, Tail) + end. + +conv_return(I, Map, Data) -> + %% TODO: multiple-value returns + {[Arg], Map0} = conv_src_list(hipe_rtl:return_varlist(I), Map), + I2 = mk_move(mk_rv(), Arg, + [hipe_ppc:mk_blr()]), + {I2, Map0, Data}. + +conv_store(I, Map, Data) -> + {Base1, Map0} = conv_dst(hipe_rtl:store_base(I), Map), + {Src, Map1} = conv_src(hipe_rtl:store_src(I), Map0), + {Base2, Map2} = conv_src(hipe_rtl:store_offset(I), Map1), + StoreSize = hipe_rtl:store_size(I), + I2 = mk_store(Src, Base1, Base2, StoreSize), + {I2, Map2, Data}. + +mk_store(Src, Base1, Base2, StoreSize) -> + StOp = + case StoreSize of + byte -> 'stb'; + int16 -> 'sth'; + int32 -> 'stw'; + word -> 'stw' + end, + case hipe_ppc:is_temp(Src) of + true -> + mk_store2(Src, Base1, Base2, StOp); + _ -> + Tmp = new_untagged_temp(), + mk_li(Tmp, Src, + mk_store2(Tmp, Base1, Base2, StOp)) + end. + +mk_store2(Src, Base1, Base2, StOp) -> + case hipe_ppc:is_temp(Base2) of + true -> + mk_store_rr(Src, Base1, Base2, StOp); + _ -> + mk_store_ri(Src, Base1, Base2, StOp) + end. + +mk_store_ri(Src, Base, Disp, StOp) -> + hipe_ppc:mk_store(StOp, Src, Disp, Base, 'new', []). + +mk_store_rr(Src, Base1, Base2, StOp) -> + StxOp = hipe_ppc:stop_to_stxop(StOp), + [hipe_ppc:mk_storex(StxOp, Src, Base1, Base2)]. + +conv_switch(I, Map, Data) -> + Labels = hipe_rtl:switch_labels(I), + LMap = [{label,L} || L <- Labels], + {NewData, JTabLab} = + case hipe_rtl:switch_sort_order(I) of + [] -> + hipe_consttab:insert_block(Data, word, LMap); + SortOrder -> + hipe_consttab:insert_sorted_block( + Data, word, LMap, SortOrder) + end, + %% no immediates allowed here + {IndexR, Map1} = conv_dst(hipe_rtl:switch_src(I), Map), + JTabR = new_untagged_temp(), + OffsetR = new_untagged_temp(), + DestR = new_untagged_temp(), + I2 = + [hipe_ppc:mk_pseudo_li(JTabR, {JTabLab,constant}), + hipe_ppc:mk_alu('slwi', OffsetR, IndexR, hipe_ppc:mk_uimm16(2)), + hipe_ppc:mk_loadx('lwzx', DestR, JTabR, OffsetR), + hipe_ppc:mk_mtspr('ctr', DestR), + hipe_ppc:mk_bctr(Labels)], + {I2, Map1, NewData}. + +%%% Create a conditional branch. +%%% If the condition tests CR0[SO], rewrite the path +%%% corresponding to SO being set to clear XER[SO]. + +mk_pseudo_bc(BCond, TrueLabel, FalseLabel, Pred) -> + case BCond of + 'so' -> + NewTrueLabel = hipe_gensym:get_next_label(ppc), + ZeroR = new_untagged_temp(), + [hipe_ppc:mk_pseudo_bc(BCond, NewTrueLabel, FalseLabel, Pred), + hipe_ppc:mk_label(NewTrueLabel) | + mk_li(ZeroR, 0, + [hipe_ppc:mk_mtspr('xer', ZeroR), + hipe_ppc:mk_b_label(TrueLabel)])]; + 'ns' -> + NewFalseLabel = hipe_gensym:get_next_label(ppc), + ZeroR = new_untagged_temp(), + [hipe_ppc:mk_pseudo_bc(BCond, TrueLabel, NewFalseLabel, Pred), + hipe_ppc:mk_label(NewFalseLabel) | + mk_li(ZeroR, 0, + [hipe_ppc:mk_mtspr('xer', ZeroR), + hipe_ppc:mk_b_label(FalseLabel)])]; + _ -> + [hipe_ppc:mk_pseudo_bc(BCond, TrueLabel, FalseLabel, Pred)] + end. + +%%% Load an integer constant into a register. + +mk_li(Dst, Value) -> mk_li(Dst, Value, []). + +mk_li(Dst, Value, Tail) -> + hipe_ppc:mk_li(Dst, Value, Tail). + +%%% Check if an RTL ALU or ALUB operator commutes. + +rtl_aluop_commutes(RtlAluOp) -> + case RtlAluOp of + 'add' -> true; + 'mul' -> true; + 'or' -> true; + 'and' -> true; + 'xor' -> true; + _ -> false + end. + +%%% Split a list of formal or actual parameters into the +%%% part passed in registers and the part passed on the stack. +%%% The parameters passed in registers are also tagged with +%%% the corresponding registers. + +split_args(Args) -> + split_args(0, hipe_ppc_registers:nr_args(), Args, []). + +split_args(I, N, [Arg|Args], RegArgs) when I < N -> + Reg = hipe_ppc_registers:arg(I), + Temp = hipe_ppc:mk_temp(Reg, 'tagged'), + split_args(I+1, N, Args, [{Arg,Temp}|RegArgs]); +split_args(_, _, StkArgs, RegArgs) -> + {RegArgs, StkArgs}. + +%%% Convert a list of actual parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_actuals([{Src,Dst}|Actuals], Rest) -> + move_actuals(Actuals, mk_move(Dst, Src, Rest)); +move_actuals([], Rest) -> + Rest. + +%%% Convert a list of formal parameters passed in +%%% registers (from split_args/1) to a list of moves. + +move_formals([{Dst,Src}|Formals], Rest) -> + move_formals(Formals, [hipe_ppc:mk_pseudo_move(Dst, Src) | Rest]); +move_formals([], Rest) -> + Rest. + +%%% Convert a 'fun' operand (MFA, prim, or temp) + +conv_fun(Fun, Map) -> + case hipe_rtl:is_var(Fun) of + true -> + conv_dst(Fun, Map); + false -> + case hipe_rtl:is_reg(Fun) of + true -> + conv_dst(Fun, Map); + false -> + if is_atom(Fun) -> + {hipe_ppc:mk_prim(Fun), Map}; + true -> + {conv_mfa(Fun), Map} + end + end + end. + +%%% Convert an MFA operand. + +conv_mfa({M,F,A}) when is_atom(M), is_atom(F), is_integer(A) -> + hipe_ppc:mk_mfa(M, F, A). + +%%% Convert an RTL source operand (imm/var/reg). +%%% Returns a temp or a naked integer. + +conv_src(Opnd, Map) -> + case hipe_rtl:is_imm(Opnd) of + true -> + Value = hipe_rtl:imm_value(Opnd), + if is_integer(Value) -> + {Value, Map} + end; + false -> + conv_dst(Opnd, Map) + end. + +conv_src_list([O|Os], Map) -> + {V, Map1} = conv_src(O, Map), + {Vs, Map2} = conv_src_list(Os, Map1), + {[V|Vs], Map2}; +conv_src_list([], Map) -> + {[], Map}. + +%%% Convert an RTL destination operand (var/reg). + +conv_fpreg(Opnd, Map) -> + case hipe_rtl:is_fpreg(Opnd) of + true -> conv_dst(Opnd, Map) + end. + +conv_dst(Opnd, Map) -> + {Name, Type} = + case hipe_rtl:is_var(Opnd) of + true -> + {hipe_rtl:var_index(Opnd), 'tagged'}; + false -> + case hipe_rtl:is_fpreg(Opnd) of + true -> + {hipe_rtl:fpreg_index(Opnd), 'double'}; + false -> + {hipe_rtl:reg_index(Opnd), 'untagged'} + end + end, + IsPrecoloured = + case Type of + 'double' -> hipe_ppc_registers:is_precoloured_fpr(Name); + _ -> hipe_ppc_registers:is_precoloured_gpr(Name) + end, + case IsPrecoloured of + true -> + {hipe_ppc:mk_temp(Name, Type), Map}; + false -> + case vmap_lookup(Map, Opnd) of + {value, NewTemp} -> + {NewTemp, Map}; + _ -> + NewTemp = hipe_ppc:mk_new_temp(Type), + {NewTemp, vmap_bind(Map, Opnd, NewTemp)} + end + end. + +conv_dst_list([O|Os], Map) -> + {Dst, Map1} = conv_dst(O, Map), + {Dsts, Map2} = conv_dst_list(Os, Map1), + {[Dst|Dsts], Map2}; +conv_dst_list([], Map) -> + {[], Map}. + +conv_formals(Os, Map) -> + conv_formals(hipe_ppc_registers:nr_args(), Os, Map, []). + +conv_formals(N, [O|Os], Map, Res) -> + Type = + case hipe_rtl:is_var(O) of + true -> 'tagged'; + _ -> 'untagged' + end, + Dst = + if N > 0 -> hipe_ppc:mk_new_temp(Type); % allocatable + true -> hipe_ppc:mk_new_nonallocatable_temp(Type) + end, + Map1 = vmap_bind(Map, O, Dst), + conv_formals(N-1, Os, Map1, [Dst|Res]); +conv_formals(_, [], Map, Res) -> + {lists:reverse(Res), Map}. + +%%% Create a temp representing the stack pointer register. + +mk_sp() -> + hipe_ppc:mk_temp(hipe_ppc_registers:stack_pointer(), 'untagged'). + +%%% Create a temp representing the return value register. + +mk_rv() -> + hipe_ppc:mk_temp(hipe_ppc_registers:return_value(), 'tagged'). + +%%% new_untagged_temp -- conjure up an untagged scratch reg + +new_untagged_temp() -> + hipe_ppc:mk_new_temp('untagged'). + +%%% new_tagged_temp -- conjure up a tagged scratch reg + +new_tagged_temp() -> + hipe_ppc:mk_new_temp('tagged'). + +%%% Map from RTL var/reg operands to temps. + +vmap_empty() -> + gb_trees:empty(). + +vmap_lookup(Map, Key) -> + gb_trees:lookup(Key, Map). + +vmap_bind(Map, Key, Val) -> + gb_trees:insert(Key, Val, Map). + +word_size() -> + hipe_rtl_arch:word_size(). -- cgit v1.2.3