diff options
author | Erlang/OTP <otp@erlang.org> | 2009-11-20 14:54:40 +0000 |
---|---|---|
committer | Erlang/OTP <otp@erlang.org> | 2009-11-20 14:54:40 +0000 |
commit | 84adefa331c4159d432d22840663c38f155cd4c1 (patch) | |
tree | bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/hipe/rtl | |
download | otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip |
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/hipe/rtl')
25 files changed, 15210 insertions, 0 deletions
diff --git a/lib/hipe/rtl/Makefile b/lib/hipe/rtl/Makefile new file mode 100644 index 0000000000..beab8da547 --- /dev/null +++ b/lib/hipe/rtl/Makefile @@ -0,0 +1,142 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 2001-2009. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# + +ifndef EBIN +EBIN = ../ebin +endif + +include $(ERL_TOP)/make/target.mk +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Application version +# ---------------------------------------------------- +include ../vsn.mk +VSN=$(HIPE_VSN) + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/lib/hipe-$(VSN) + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- +ifdef HIPE_ENABLED +HIPE_MODULES = hipe_rtl hipe_rtl_cfg \ + hipe_rtl_liveness \ + hipe_icode2rtl hipe_rtl_mk_switch \ + hipe_rtl_primops \ + hipe_rtl_varmap hipe_rtl_exceptions \ + hipe_rtl_binary_match hipe_rtl_binary_construct \ + hipe_rtl_arith_32 hipe_rtl_arith_64 \ + hipe_rtl_ssa hipe_rtl_ssa_const_prop \ + hipe_rtl_cleanup_const hipe_rtl_symbolic hipe_rtl_lcm \ + hipe_rtl_ssapre hipe_rtl_binary hipe_rtl_ssa_avail_expr \ + hipe_rtl_arch hipe_tagscheme +else +HIPE_MODULES = +endif + +MODULES = $(HIPE_MODULES) + +HRL_FILES= hipe_literals.hrl +ERL_FILES= $(MODULES:%=%.erl) +TARGET_FILES= $(MODULES:%=$(EBIN)/%.$(EMULATOR)) + +# APP_FILE= +# App_SRC= $(APP_FILE).src +# APP_TARGET= $(EBIN)/$(APP_FILE) +# +# APPUP_FILE= +# APPUP_SRC= $(APPUP_FILE).src +# APPUP_TARGET= $(EBIN)/$(APPUP_FILE) + +# ---------------------------------------------------- +# FLAGS: Please keep +inline below +# ---------------------------------------------------- + +include ../native.mk + +ERL_COMPILE_FLAGS += +inline + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +debug opt: $(TARGET_FILES) + +docs: + +clean: + rm -f hipe_literals.hrl + rm -f $(TARGET_FILES) + rm -f core erl_crash.dump + +# ---------------------------------------------------- +# Special Build Targets +# ---------------------------------------------------- + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: opt + $(INSTALL_DIR) $(RELSYSDIR)/rtl + $(INSTALL_DATA) $(ERL_FILES) $(HRL_FILES) $(RELSYSDIR)/rtl + $(INSTALL_DIR) $(RELSYSDIR)/ebin + $(INSTALL_DATA) $(TARGET_FILES) $(RELSYSDIR)/ebin + +release_docs_spec: + + +HIPE_MKLITERALS=$(ERL_TOP)/bin/$(TARGET)/hipe_mkliterals + +hipe_literals.hrl: $(HIPE_MKLITERALS) + $(HIPE_MKLITERALS) -e > hipe_literals.hrl + +../main/hipe.hrl: ../vsn.mk ../main/hipe.hrl.src + sed -e "s;%VSN%;$(HIPE_VSN);" ../main/hipe.hrl.src > ../main/hipe.hrl + +$(EBIN)/hipe_rtl.beam: hipe_rtl.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_arch.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_binary.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_bin_util.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_cfg.beam: hipe_rtl.hrl ../flow/cfg.hrl ../flow/cfg.inc ../main/hipe.hrl +$(EBIN)/hipe_rtl_cleanup_const.beam: hipe_rtl.hrl +$(EBIN)/hipe_rtl_liveness.beam: hipe_rtl.hrl ../flow/cfg.hrl ../flow/liveness.inc +$(EBIN)/hipe_icode2rtl.beam: hipe_literals.hrl ../main/hipe.hrl ../icode/hipe_icode.hrl +$(EBIN)/hipe_tagscheme.beam: hipe_rtl.hrl hipe_literals.hrl +$(EBIN)/hipe_rtl_primops.beam: hipe_rtl.hrl ../icode/hipe_icode_primops.hrl hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_arith_32.beam: ../main/hipe.hrl hipe_rtl_arith.inc +$(EBIN)/hipe_rtl_arith_64.beam: ../main/hipe.hrl hipe_rtl_arith.inc +$(EBIN)/hipe_rtl_bs_ops.beam: hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_cerl_bs_ops.beam: ../main/hipe.hrl hipe_literals.hrl hipe_rtl.hrl +$(EBIN)/hipe_rtl_exceptions.beam: hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_inline_bs_ops.beam: hipe_rtl.hrl hipe_literals.hrl ../main/hipe.hrl +$(EBIN)/hipe_rtl_mk_switch.beam: ../main/hipe.hrl +$(EBIN)/hipe_rtl_lcm.beam: ../flow/cfg.hrl hipe_rtl.hrl +$(EBIN)/hipe_rtl_symbolic.beam: hipe_rtl.hrl hipe_literals.hrl ../flow/cfg.hrl ../icode/hipe_icode_primops.hrl +$(EBIN)/hipe_rtl_varmap.beam: ../main/hipe.hrl ../icode/hipe_icode.hrl + +$(EBIN)/hipe_rtl_ssa.beam: ../ssa/hipe_ssa.inc ../main/hipe.hrl ../ssa/hipe_ssa_liveness.inc hipe_rtl.hrl +$(EBIN)/hipe_rtl_ssa_const_prop.beam: hipe_rtl.hrl ../main/hipe.hrl ../flow/cfg.hrl ../ssa/hipe_ssa_const_prop.inc +$(EBIN)/hipe_rtl_ssapre.beam: ../main/hipe.hrl ../flow/cfg.hrl hipe_rtl.hrl diff --git a/lib/hipe/rtl/hipe_icode2rtl.erl b/lib/hipe/rtl/hipe_icode2rtl.erl new file mode 100644 index 0000000000..034153a3cb --- /dev/null +++ b/lib/hipe/rtl/hipe_icode2rtl.erl @@ -0,0 +1,727 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================= +%% File : hipe_icode2rtl.erl +%% Author(s) : Erik Johansson +%% Description : Translates Icode to RTL +%%======================================================================= +%% +%% $Id$ +%% +%% TODO: Better handling of switches... + +-module(hipe_icode2rtl). + +-export([translate/2]). +-export([translate_instrs/4]). %% used in hipe_rtl_mk_switch + +%%------------------------------------------------------------------------- + +%% -define(DEBUG,1). % used by hipe.hrl below + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode.hrl"). +-include("hipe_literals.hrl"). + +%%------------------------------------------------------------------------- + +%% @spec translate(IcodeRecord::#icode{}, Options::options()) -> term() +%% +%% options() = [option()] +%% option() = term() +%% +%% @doc Translates a linear form of Icode for a single function to a +%% linear form of RTL-code. +%% +translate(IcodeRecord = #icode{}, Options) -> + ?IF_DEBUG_LEVEL(2, put(hipe_mfa, hipe_icode:icode_fun(IcodeRecord)), ok), + %% hipe_icode_pp:pp(Fun), + + %% Initialize gensym and varmap + {Args, VarMap} = hipe_rtl_varmap:init(IcodeRecord), + %% Get the name and other info of the function to translate. + MFA = hipe_icode:icode_fun(IcodeRecord), + ConstTab = hipe_consttab:new(), % hipe_icode:icode_data(IcodeRecord), + %% io:format("~w\n", [ConstTab]), + Icode = hipe_icode:icode_code(IcodeRecord), + IsClosure = hipe_icode:icode_is_closure(IcodeRecord), + IsLeaf = hipe_icode:icode_is_leaf(IcodeRecord), + IcodeInfo = hipe_icode:icode_info(IcodeRecord), + + %% Translate Icode instructions to RTL instructions + ?opt_start_timer("Icode to nested RTL"), + {Code, _VarMap1, ConstTab1} = + translate_instrs(Icode, VarMap, ConstTab, Options), + ?opt_stop_timer("Icode to nested RTL"), + %% We build the code as list of lists of... + %% in order to avoid appends. + ?opt_start_timer("Flatten RTL"), + Code1 = lists:flatten(Code), + ?opt_stop_timer("Flatten RTL"), + %% Build the RTL structure. + Rtl = hipe_rtl:mk_rtl(MFA, + Args, + IsClosure, + IsLeaf, + Code1, + ConstTab1, + {1, hipe_gensym:get_var(rtl)}, + {1, hipe_gensym:get_label(rtl)}), + %% hipe_rtl:pp(Rtl), + %% Propagate info from Icode to RTL. + hipe_rtl:rtl_info_update(Rtl, IcodeInfo). + +%%------------------------------------------------------------------------- + +%% +%% @doc Translates a list of Icode instructions to a list of RTL instructions. +%% +translate_instrs(Is, VarMap, ConstTab, Options) -> + translate_instrs(Is, VarMap, [], ConstTab, Options). + +translate_instrs([], VarMap, RTL_Code, ConstTab, _Options) -> + {RTL_Code, VarMap, ConstTab}; +translate_instrs([I|Is], VarMap, AccCode, ConstTab, Options) -> + %% Translate one instruction. + {Code, VarMap0, ConstTab0} = + translate_instruction(I, VarMap, ConstTab, Options), + %% ?IF_DEBUG_LEVEL(3,?msg(" To Instr: ~w~n",[Code]),no_debug), + ?IF_DEBUG(?when_option(rtl_show_translation, Options, + ?msg(" To Instr: ~w~n", [Code])), ok), + translate_instrs(Is, VarMap0, [AccCode,Code], ConstTab0, Options). + +%% +%% @doc Translates an Icode instruction to one or more RTL instructions. +%% + +translate_instruction(I, VarMap, ConstTab, Options) -> + %% ?IF_DEBUG_LEVEL(3,?msg("From Instr: ~w~n",[I]),no_debug), + ?IF_DEBUG(?when_option(rtl_show_translation, Options, + ?msg("From Instr: ~w~n", [I])), ok), + case I of + #icode_call{} -> + gen_call(I, VarMap, ConstTab); + #icode_comment{} -> + {hipe_rtl:mk_comment(hipe_icode:comment_text(I)), VarMap, ConstTab}; + #icode_enter{} -> + gen_enter(I, VarMap, ConstTab); + #icode_fail{} -> + gen_fail(I, VarMap, ConstTab); + #icode_goto{} -> + gen_goto(I, VarMap, ConstTab); + #icode_if{} -> + gen_if(I, VarMap, ConstTab); + #icode_label{} -> + gen_label(I, VarMap, ConstTab); + #icode_move{} -> + gen_move(I, VarMap, ConstTab); + #icode_begin_handler{} -> + hipe_rtl_exceptions:gen_begin_handler(I, VarMap, ConstTab); + #icode_return{} -> + gen_return(I, VarMap, ConstTab); + #icode_switch_val{} -> + gen_switch_val(I, VarMap, ConstTab, Options); + #icode_switch_tuple_arity{} -> + gen_switch_tuple(I, VarMap, ConstTab, Options); + #icode_type{} -> + gen_type(I, VarMap, ConstTab); + X -> + exit({?MODULE,{"unknown Icode instruction",X}}) + end. + +%%------------------------------------------------------------------------- + +%% +%% CALL +%% + +gen_call(I, VarMap, ConstTab) -> + Fun = hipe_icode:call_fun(I), + {Dst, VarMap0} = hipe_rtl_varmap:ivs2rvs(hipe_icode:call_dstlist(I), VarMap), + Fail = hipe_icode:call_fail_label(I), + + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:call_args(I), VarMap0, ConstTab), + + IsGuard = hipe_icode:call_in_guard(I), + + {FailLblName, VarMap3} = + case Fail of + [] -> %% Not in a catch + {[], VarMap1}; + _ -> + {FLbl, VarMap2} = + hipe_rtl_varmap:icode_label2rtl_label(Fail, VarMap1), + {hipe_rtl:label_name(FLbl), VarMap2} + end, + + {ContLblName, ContLbl, VarMap4} = + case hipe_icode:call_continuation(I) of + [] -> %% This call does not end a BB. + CLbl = hipe_rtl:mk_new_label(), + {hipe_rtl:label_name(CLbl), CLbl, VarMap3}; + Cont -> + {CLbl, NewVarMap} = + hipe_rtl_varmap:icode_label2rtl_label(Cont, VarMap3), + {hipe_rtl:label_name(CLbl), [], NewVarMap} + end, + + {Code, ConstTab2} = + case hipe_icode:call_type(I) of + primop -> + hipe_rtl_primops:gen_primop( + {Fun, Dst, Args, ContLblName, FailLblName}, + IsGuard, ConstTab1); + Type -> + Call = gen_call_1(Fun, Dst, Args, IsGuard, ContLblName, + FailLblName, Type), + {Call, ConstTab1} + end, + {[InitCode,Code,ContLbl], VarMap4, ConstTab2}. + +%% This catches those standard functions that we inline expand + +gen_call_1(Fun={_M,_F,_A}, Dst, Args, IsGuard, Cont, Fail, Type) -> + case hipe_rtl_primops:gen_call_builtin(Fun, Dst, Args, IsGuard, Cont, + Fail) of + [] -> + hipe_rtl:mk_call(Dst, Fun, Args, Cont, Fail, conv_call_type(Type)); + Code -> + Code + end. + +conv_call_type(remote) -> remote; +conv_call_type(local) -> not_remote. + +%% -------------------------------------------------------------------- + +%% +%% ENTER +%% + +gen_enter(I, VarMap, ConstTab) -> + Fun = hipe_icode:enter_fun(I), + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:enter_args(I), VarMap, ConstTab), + {Code1, ConstTab2} = + case hipe_icode:enter_type(I) of + primop -> + IsGuard = false, % enter can not happen in a guard + hipe_rtl_primops:gen_enter_primop({Fun, Args}, IsGuard, ConstTab1); + Type -> + Call = gen_enter_1(Fun, Args, Type), + {Call, ConstTab1} + end, + {[InitCode,Code1], VarMap1, ConstTab2}. + +%% This catches those standard functions that we inline expand + +gen_enter_1(Fun, Args, Type) -> + case hipe_rtl_primops:gen_enter_builtin(Fun, Args) of + [] -> + hipe_rtl:mk_enter(Fun, Args, conv_call_type(Type)); + Code -> + Code + end. + +%% -------------------------------------------------------------------- + +%% +%% FAIL +%% + +gen_fail(I, VarMap, ConstTab) -> + Fail = hipe_icode:fail_label(I), + {Label, VarMap0} = + if Fail =:= [] -> + %% not in a catch + {[], VarMap}; + true -> + {Lbl, Map} = hipe_rtl_varmap:icode_label2rtl_label(Fail, VarMap), + {hipe_rtl:label_name(Lbl), Map} + end, + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:fail_args(I), VarMap0, ConstTab), + Class = hipe_icode:fail_class(I), + FailCode = hipe_rtl_exceptions:gen_fail(Class, Args, Label), + {[InitCode, FailCode], VarMap1, ConstTab1}. + +%% -------------------------------------------------------------------- + +%% +%% GOTO +%% + +gen_goto(I, VarMap, ConstTab) -> + {Label, Map0} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:goto_label(I), VarMap), + {hipe_rtl:mk_goto(hipe_rtl:label_name(Label)), Map0, ConstTab}. + +%% -------------------------------------------------------------------- + +%% +%% IF +%% + +gen_if(I, VarMap, ConstTab) -> + {Args, VarMap1, ConstTab1, InitCode} = + args_to_vars(hipe_icode:if_args(I), VarMap, ConstTab), + {TrueLbl, VarMap2} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:if_true_label(I), VarMap1), + {FalseLbl, VarMap3} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:if_false_label(I),VarMap2), + CondCode = + gen_cond(hipe_icode:if_op(I), + Args, + hipe_rtl:label_name(TrueLbl), + hipe_rtl:label_name(FalseLbl), + hipe_icode:if_pred(I)), + {[InitCode,CondCode], VarMap3, ConstTab1}. + + +%% -------------------------------------------------------------------- + +%% +%% LABEL +%% + +gen_label(I, VarMap, ConstTab) -> + LabelName = hipe_icode:label_name(I), + {NewLabel,Map0} = hipe_rtl_varmap:icode_label2rtl_label(LabelName, VarMap), + {NewLabel,Map0,ConstTab}. + +%% -------------------------------------------------------------------- + +%% +%% MOVE +%% + +gen_move(I, VarMap, ConstTab) -> + MovedSrc = hipe_icode:move_src(I), + {Dst, VarMap0} = + hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:move_dst(I), VarMap), + case hipe_icode:is_const(MovedSrc) of + true -> + {Code, NewConstMap} = gen_const_move(Dst, MovedSrc, ConstTab), + {[Code], VarMap0, NewConstMap}; + false -> + {Src, VarMap1} = hipe_rtl_varmap:icode_var2rtl_var(MovedSrc, VarMap0), + Code = + case hipe_icode:is_fvar(MovedSrc) of + true -> + hipe_rtl:mk_fmove(Dst, Src); + false -> % It is a var or reg + hipe_rtl:mk_move(Dst, Src) + end, + {[Code], VarMap1, ConstTab} + end. + +%% -------------------------------------------------------------------- + +%% +%% RETURN +%% + +gen_return(I, VarMap, ConstTab) -> + {RetVars, VarMap0, ConstTab0, Code} = + args_to_vars(hipe_icode:return_vars(I), VarMap, ConstTab), + {Code ++ [hipe_rtl:mk_return(RetVars)], VarMap0, ConstTab0}. + +%% -------------------------------------------------------------------- + +%% +%% SWITCH +%% + +%% +%% Rewrite switch_val to the equivalent Icode if-then-else sequence, +%% then translate that sequence instead. +%% Doing this at the RTL level would generate the exact same code, +%% but would also require _a_lot_ more work. +%% (Don't believe me? Try it. I did, and threw the code away in disgust. +%% The main ugliness comes from (1) maintaining ConstTab for the constants +%% that may be added there [switch_val is not limited to immediates!], +%% (2) maintaining Map for the translated labels, and (3) expanding +%% equality tests to eq-or-call-primop-exact_eqeq_2.) +%% +%% TODO: +%% - separate immediate and non-immediate cases, +%% and translate each list separately +%% +-ifdef(usesjumptable). +-define(uumess,?msg("~w Use jtab: ~w\n", + [Options,proplists:get_bool(use_jumptable, Options)])). +-else. +-define(uumess,ok). +-endif. + +gen_switch_val(I, VarMap, ConstTab, Options) -> + %% If you want to see whether jumptables are used or not... + ?uumess, + hipe_rtl_mk_switch:gen_switch_val(I, VarMap, ConstTab, Options). + +gen_switch_tuple(I, Map, ConstTab, Options) -> + hipe_rtl_mk_switch:gen_switch_tuple(I, Map, ConstTab, Options). + +%% -------------------------------------------------------------------- + +%% +%% TYPE +%% + +gen_type(I, VarMap, ConstTab) -> + {Vars, Map0, NewConstTab, Code1} = + args_to_vars(hipe_icode:type_args(I), VarMap, ConstTab), + {TrueLbl, Map1} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:type_true_label(I), Map0), + {FalseLbl, Map2} = + hipe_rtl_varmap:icode_label2rtl_label(hipe_icode:type_false_label(I), Map1), + {Code2, NewConstTab1} = gen_type_test(Vars, hipe_icode:type_test(I), + hipe_rtl:label_name(TrueLbl), + hipe_rtl:label_name(FalseLbl), + hipe_icode:type_pred(I), + NewConstTab), + {Code1 ++ Code2, Map2, NewConstTab1}. + +%% -------------------------------------------------------------------- + +%% +%% Generate code for a type test. If X is not of type Type then goto Label. +%% + +gen_type_test([X], Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + atom -> + {hipe_tagscheme:test_atom(X, TrueLbl, FalseLbl, Pred), ConstTab}; + bignum -> + {hipe_tagscheme:test_bignum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + binary -> + {hipe_tagscheme:test_binary(X, TrueLbl, FalseLbl, Pred), ConstTab}; + bitstr -> + {hipe_tagscheme:test_bitstr(X, TrueLbl, FalseLbl, Pred), ConstTab}; + boolean -> + TmpT = hipe_rtl:mk_new_var(), + TmpF = hipe_rtl:mk_new_var(), + Lbl = hipe_rtl:mk_new_label(), + {[hipe_rtl:mk_load_atom(TmpT, true), + hipe_rtl:mk_branch(X, eq, TmpT, TrueLbl,hipe_rtl:label_name(Lbl),Pred), + Lbl, + hipe_rtl:mk_load_atom(TmpF, false), + hipe_rtl:mk_branch(X, eq, TmpF, TrueLbl, FalseLbl, Pred)], ConstTab}; + cons -> + {hipe_tagscheme:test_cons(X, TrueLbl, FalseLbl, Pred), ConstTab}; + constant -> + {hipe_tagscheme:test_constant(X, TrueLbl, FalseLbl, Pred), ConstTab}; + fixnum -> + {hipe_tagscheme:test_fixnum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + float -> + {hipe_tagscheme:test_flonum(X, TrueLbl, FalseLbl, Pred), ConstTab}; + function -> + {hipe_tagscheme:test_fun(X, TrueLbl, FalseLbl, Pred), ConstTab}; + integer -> + {hipe_tagscheme:test_integer(X, TrueLbl, FalseLbl, Pred), ConstTab}; + list -> + {hipe_tagscheme:test_list(X, TrueLbl, FalseLbl, Pred), ConstTab}; + nil -> + {hipe_tagscheme:test_nil(X, TrueLbl, FalseLbl, Pred), ConstTab}; + number -> + {hipe_tagscheme:test_number(X, TrueLbl, FalseLbl, Pred), ConstTab}; + pid -> + {hipe_tagscheme:test_any_pid(X, TrueLbl, FalseLbl, Pred), ConstTab}; + port -> + {hipe_tagscheme:test_any_port(X, TrueLbl, FalseLbl, Pred), ConstTab}; + reference -> + {hipe_tagscheme:test_ref(X, TrueLbl, FalseLbl, Pred), ConstTab}; + tuple -> + {hipe_tagscheme:test_tuple(X, TrueLbl, FalseLbl, Pred), ConstTab}; + {atom, Atom} -> + Tmp = hipe_rtl:mk_new_var(), + {[hipe_rtl:mk_load_atom(Tmp, Atom), + hipe_rtl:mk_branch(X, eq, Tmp, TrueLbl, FalseLbl, Pred)], ConstTab}; + {integer, N} when is_integer(N) -> + %% XXX: warning, does not work for bignums + case hipe_tagscheme:is_fixnum(N) of + true -> + Int = hipe_tagscheme:mk_fixnum(N), + {hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(Int), + TrueLbl, FalseLbl, Pred), + ConstTab}; + false -> + BignumLbl = hipe_rtl:mk_new_label(), + RetLbl = hipe_rtl:mk_new_label(), + BigN = hipe_rtl:mk_new_var(), + Tmp = hipe_rtl:mk_new_var(), + {BigCode,NewConstTab} = gen_big_move(BigN, N, ConstTab), + {[hipe_tagscheme:test_fixnum(X, FalseLbl, + hipe_rtl:label_name(BignumLbl),1-Pred), + BignumLbl, BigCode] + ++ + [hipe_rtl:mk_call([Tmp], op_exact_eqeq_2 , [X,BigN], + hipe_rtl:label_name(RetLbl),[],not_remote), + RetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)], + NewConstTab} + end; + {record, A, S} -> + TupleLbl = hipe_rtl:mk_new_label(), + TupleLblName = hipe_rtl:label_name(TupleLbl), + AtomLab = hipe_rtl:mk_new_label(), + AtomLabName = hipe_rtl:label_name(AtomLab), + TagVar = hipe_rtl:mk_new_var(), + TmpAtomVar = hipe_rtl:mk_new_var(), + {UntagCode, ConstTab1} = + hipe_rtl_primops:gen_primop({{unsafe_element,1},[TagVar],[X], + AtomLabName,[]}, + false, ConstTab), + Code = + hipe_tagscheme:test_tuple_N(X, S, TupleLblName, FalseLbl, Pred) ++ + [TupleLbl|UntagCode] ++ + [AtomLab, + hipe_rtl:mk_load_atom(TmpAtomVar, A), + hipe_rtl:mk_branch(TagVar, eq, TmpAtomVar, TrueLbl, FalseLbl, Pred)], + {Code, + ConstTab1}; + {tuple, N} -> + {hipe_tagscheme:test_tuple_N(X, N, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"unknown type",Other}}) + end; +gen_type_test(Z = [X,Y], Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + function2 -> + {hipe_tagscheme:test_fun2(X, Y, TrueLbl, FalseLbl, Pred), ConstTab}; + fixnum -> + {hipe_tagscheme:test_fixnums(Z, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"unknown type",Other}}) + end; +gen_type_test(X, Type, TrueLbl, FalseLbl, Pred, ConstTab) -> + case Type of + fixnum -> + {hipe_tagscheme:test_fixnums(X, TrueLbl, FalseLbl, Pred), ConstTab}; + Other -> + exit({?MODULE,{"type cannot have several arguments",Other}}) + end. + + +%% -------------------------------------------------------------------- +%% +%% Generate code for the if-conditional. +%% + +gen_cond(CondOp, Args, TrueLbl, FalseLbl, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + GenLbl = hipe_rtl:mk_new_label(), + TestRetLbl = hipe_rtl:mk_new_label(), + TestRetName = hipe_rtl:label_name(TestRetLbl), + + case CondOp of + 'fixnum_eq' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, + FalseLbl, Pred)]; + '=:=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, TrueLbl, + hipe_rtl:label_name(GenLbl), Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, + TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + 'fixnum_neq' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, + TrueLbl, 1-Pred)]; + '=/=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, FalseLbl, + hipe_rtl:label_name(GenLbl), 1-Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], op_exact_eqeq_2, Args, + TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + FalseLbl, TrueLbl, Pred)]; + '==' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, + TrueLbl, hipe_rtl:label_name(GenLbl), Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '/=' -> + [Arg1, Arg2] = Args, + [hipe_rtl:mk_branch(Arg1, eq, Arg2, + FalseLbl, hipe_rtl:label_name(GenLbl), 1-Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + 'fixnum_gt' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_gt(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_ge' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_ge(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_lt' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_lt(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + 'fixnum_le' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_le(Arg1, Arg2, TrueLbl, FalseLbl, Pred)]; + '>' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_gt(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, gt, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '<' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_lt(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, lt, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '>=' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_ge(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ge, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + '=<' -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenLbl)), + hipe_tagscheme:fixnum_le(Arg1, Arg2, TrueLbl, FalseLbl, Pred), + GenLbl, + hipe_rtl:mk_call([Tmp], cmp_2, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, le, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)]; + _Other -> + [hipe_rtl:mk_call([Tmp], CondOp, Args, TestRetName, [], not_remote), + TestRetLbl, + hipe_rtl:mk_branch(Tmp, ne, hipe_rtl:mk_imm(0), + TrueLbl, FalseLbl, Pred)] + end. + +%% -------------------------------------------------------------------- +%% +%% Translate a list argument list of icode vars to rtl vars. Also +%% handles constants in arguments. +%% + +args_to_vars([Arg|Args],VarMap, ConstTab) -> + {Vars, VarMap1, ConstTab1, Code} = + args_to_vars(Args, VarMap, ConstTab), + case hipe_icode:is_variable(Arg) of + true -> + {Var, VarMap2} = hipe_rtl_varmap:icode_var2rtl_var(Arg, VarMap1), + {[Var|Vars], VarMap2, ConstTab1, Code}; + false -> + case type_of_const(Arg) of + big -> + ConstVal = hipe_icode:const_value(Arg), + {ConstTab2, Label} = hipe_consttab:insert_term(ConstTab1, ConstVal), + NewArg = hipe_rtl:mk_const_label(Label), + {[NewArg|Vars], VarMap1, ConstTab2, Code}; + fixnum -> + ConstVal = hipe_icode:const_value(Arg), + NewArg = hipe_rtl:mk_imm(tagged_val_of(ConstVal)), + {[NewArg|Vars], VarMap1, ConstTab1, Code}; + nil -> + NewArg = hipe_rtl:mk_imm(tagged_val_of([])), + {[NewArg|Vars], VarMap1, ConstTab1, Code}; + _ -> + Var = hipe_rtl:mk_new_var(), + {Code2, ConstTab2} = gen_const_move(Var, Arg, ConstTab1), + {[Var|Vars], VarMap1, ConstTab2, [Code2,Code]} + end + end; +args_to_vars([], VarMap, ConstTab) -> + {[], VarMap, ConstTab, []}. + +%% -------------------------------------------------------------------- + +%% +%% Translate a move where the source is a constant +%% + +gen_const_move(Dst, Const, ConstTab) -> + ConstVal = hipe_icode:const_value(Const), + case type_of_const(Const) of + %% const_fun -> + %% gen_fun_move(Dst, ConstVal, ConstTab); + nil -> + Src = hipe_rtl:mk_imm(tagged_val_of([])), + {hipe_rtl:mk_move(Dst, Src), ConstTab}; + fixnum -> + Src = hipe_rtl:mk_imm(tagged_val_of(ConstVal)), + {hipe_rtl:mk_move(Dst, Src), ConstTab}; + atom -> + {hipe_rtl:mk_load_atom(Dst, ConstVal), ConstTab}; + big -> + gen_big_move(Dst, ConstVal, ConstTab) + end. + +%% gen_fun_move(Dst, Fun, ConstTab) -> +%% ?WARNING_MSG("Funmove ~w! -- NYI\n", [Fun]), +%% {NewTab, Label} = hipe_consttab:insert_fun(ConstTab, Fun), +%% {hipe_rtl:mk_load_address(Dst, Label, constant), NewTab}. + +gen_big_move(Dst, Big, ConstTab) -> + {NewTab, Label} = hipe_consttab:insert_term(ConstTab, Big), + {hipe_rtl:mk_move(Dst, hipe_rtl:mk_const_label(Label)), + NewTab}. + +type_of_const(Const) -> + case hipe_icode:const_value(Const) of + [] -> + nil; + X when is_integer(X) -> + case hipe_tagscheme:is_fixnum(X) of + true -> fixnum; + false -> big + end; + A when is_atom(A) -> + atom; + _ -> + big + end. + +tagged_val_of([]) -> hipe_tagscheme:mk_nil(); +tagged_val_of(X) when is_integer(X) -> hipe_tagscheme:mk_fixnum(X). diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl new file mode 100644 index 0000000000..ef06b2abf8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl.erl @@ -0,0 +1,1655 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% Provides an abstract datatype for HiPE's RTL (Register Transfer Language). +%% +%% <h3> RTL - Register Transfer Language </h3> +%% +%% Consists of the instructions: +%% <ul> +%% <li> {alu, Dst, Src1, Op, Src2} </li> +%% <li> {alub, Dst, Src1, Op, Src2, RelOp, TrueLabel, FalseLabel, P} </li> +%% <li> {branch, Src1, Src2, RelOp, TrueLabel, FalseLabel, P} </li> +%% <li> {call, DsListt, Fun, ArgList, Type, Continuation, FailContinuation} +%% Type is one of {local, remote, primop, closure} </li> +%% <li> {comment, Text} </li> +%% <li> {enter, Fun, ArgList, Type} +%% Type is one of {local, remote, primop, closure} </li> +%% <li> {fconv, Dst, Src} </li> +%% <li> {fload, Dst, Src, Offset} </li> +%% <li> {fmove, Dst, Src} </li> +%% <li> {fp, Dst, Src1, Op, Src2} </li> +%% <li> {fp_unop, Dst, Src, Op} </li> +%% <li> {fstore, Base, Offset, Src} </li> +%% <li> {gctest, Words} </li> +%% <li> {goto, Label} </li> +%% <li> {goto_index, Block, Index, LabelList} </li> +%% <li> {label, Name} </li> +%% <li> {load, Dst, Src, Offset, Size, Sign} </li> +%% <li> {load_address, Dst, Addr, Type} </li> +%% <li> {load_atom, Dst, Atom} </li> +%% <li> {load_word_index, Dst, Block, Index} </li> +%% <li> {move, Dst, Src} </li> +%% <li> {multimove, [Dst1, ..., DstN], [Src1, ..., SrcN]} </li> +%% <li> {phi, Dst, Id, [Src1, ..., SrcN]} </li> +%% <li> {return, VarList} </li> +%% <li> {store, Base, Offset, Src, Size} </li> +%% <li> {switch, Src1, Labels, SortedBy} </li> +%% </ul> +%% +%% There are three kinds of 'registers' in RTL. +%% <ol> +%% <li> Variables containing tagged data that are traced by the GC. </li> +%% <li> Registers that are ignored by the GC. </li> +%% <li> Floating point registers. </li> +%% </ol> +%% These registers all share the same namespace. +%% +%% IMPORTANT: +%% +%% The variables contain tagged Erlang terms, the registers +%% contain untagged values (that can be all sorts of things) and +%% the floating point registers contain untagged floating point +%% values. This means that the different kinds of 'registers' are +%% incompatible and CANNOT be assigned to each other unless the +%% proper conversions are made. +%% +%% When performing optimizations, it is reasonably safe to move +%% values stored in variables. However, when moving around untagged +%% values from either registers or floating point registers make +%% sure you know what you are doing. +%% +%% Example 1: A register might contain the untagged pointer to +%% something on the heap. If this value is moved across +%% a program point where a garbage collection might +%% occur, the pointer can be invalid. If you are lucky +%% you will end up with a segmentation fault; if unlucky, +%% you will be stuck on a wild goose chase. +%% +%% Example 2: Floating point arithmetic instructions must occur in +%% a floating point block. Otherwise, exceptions can be +%% masked. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl). +-include("../main/hipe.hrl"). + +-export([mk_rtl/8, + rtl_fun/1, + rtl_params/1, + rtl_is_closure/1, + rtl_is_leaf/1, + rtl_code/1, + rtl_code_update/2, + rtl_data/1, + %% rtl_data_update/2, + %% rtl_var_range/1, + %% rtl_var_range_update/2, + %% rtl_label_range/1, + %% rtl_label_range_update/2, + rtl_info/1, + rtl_info_update/2]). + +-export([mk_move/2, + move_dst/1, + move_src/1, + %% move_src_update/2, + %% is_move/1, + + mk_multimove/2, + multimove_dstlist/1, + multimove_srclist/1, + %% multimove_srclist_update/2, + %% is_multimove/1, + + mk_phi/1, + phi_dst/1, + phi_id/1, + phi_arg/2, + phi_arglist/1, + is_phi/1, + phi_enter_pred/3, + phi_remove_pred/2, + + mk_alu/4, + alu_dst/1, + alu_src1/1, + alu_src1_update/2, + alu_src2/1, + alu_src2_update/2, + alu_op/1, + %% is_alu_op/1, + is_shift_op/1, + + mk_load/3, + mk_load/5, + load_dst/1, + load_src/1, + load_offset/1, + load_size/1, + load_sign/1, + + mk_load_atom/2, + load_atom_dst/1, + load_atom_atom/1, + + mk_load_word_index/3, + load_word_index_dst/1, + %% load_word_index_index/1, + %% load_word_index_block/1, + + mk_goto_index/3, + goto_index_index/1, + %% goto_index_block/1, + goto_index_labels/1, + + mk_load_address/3, + load_address_dst/1, + %% load_address_dst_update/2, + load_address_addr/1, + load_address_addr_update/2, + load_address_type/1, + %% load_address_type_update/2, + + mk_store/3, + mk_store/4, + store_base/1, + store_src/1, + store_offset/1, + store_size/1, + + mk_label/1, + mk_new_label/0, + label_name/1, + is_label/1, + + mk_branch/5, + mk_branch/6, + branch_src1/1, + branch_src2/1, + branch_cond/1, + branch_true_label/1, + branch_false_label/1, + branch_pred/1, + %% is_branch/1, + %% branch_true_label_update/2, + %% branch_false_label_update/2, + + mk_alub/7, + mk_alub/8, + alub_dst/1, + alub_src1/1, + alub_op/1, + alub_src2/1, + alub_cond/1, + alub_true_label/1, + %% alub_true_label_update/2, + alub_false_label/1, + %% alub_false_label_update/2, + alub_pred/1, + %% is_alub/1, + + mk_switch/2, + %% mk_switch/3, + mk_sorted_switch/3, + switch_src/1, + %% switch_src_update/2, + switch_labels/1, + %% switch_labels_update/2, + switch_sort_order/1, + %% switch_sort_order_update/2, + + mk_goto/1, + goto_label/1, + is_goto/1, + %% goto_label_update/2, + + mk_call/6, + call_fun/1, + call_dstlist/1, + call_dstlist_update/2, + call_arglist/1, + call_continuation/1, + call_fail/1, + call_type/1, + %% call_continuation_update/2, + %% call_fail_update/2, + is_call/1, + + mk_enter/3, + enter_fun/1, + enter_arglist/1, + enter_type/1, + + mk_return/1, + return_varlist/1, + + mk_gctest/1, + gctest_words/1, + + mk_comment/1, + comment_text/1, + is_comment/1, + + mk_fload/3, + fload_dst/1, + fload_src/1, + %% fload_src_update/2, + fload_offset/1, + %% fload_offset_update/2, + + mk_fstore/3, + fstore_base/1, + fstore_src/1, + fstore_offset/1, + + mk_fp/4, + fp_dst/1, + fp_src1/1, + %% fp_src1_update/2, + fp_src2/1, + %% fp_src2_update/2, + fp_op/1, + + mk_fp_unop/3, + fp_unop_dst/1, + fp_unop_src/1, + %% fp_unop_src_update/2, + fp_unop_op/1, + + mk_fmove/2, + fmove_dst/1, + fmove_src/1, + %% fmove_src_update/2, + %% is_fmove/1, + + mk_fconv/2, + fconv_dst/1, + fconv_src/1, + %% fconv_src_update/2, + %% is_fconv/1, + + %% mk_var/1, + mk_new_var/0, + is_var/1, + var_index/1, + + %% change_vars_to_regs/1, + + mk_fixnumop/3, + fixnumop_dst/1, + fixnumop_src/1, + fixnumop_type/1, + + mk_reg/1, % assumes non gc-safe + mk_reg_gcsafe/1, + mk_new_reg/0, % assumes non gc-safe + mk_new_reg_gcsafe/0, + is_reg/1, + reg_index/1, + reg_is_gcsafe/1, + + %% mk_fpreg/1, + mk_new_fpreg/0, + is_fpreg/1, + fpreg_index/1, + + mk_imm/1, + is_imm/1, + imm_value/1, + + mk_const_label/1, + const_label_label/1, + is_const_label/1, + + args/1, + uses/1, + %% subst/2, + subst_uses/2, + subst_defines/2, + defines/1, + redirect_jmp/3, + is_safe/1, + %% highest_var/1, + pp/1, + pp/2, + pp_block/1, + + %% FIXME _dst_update command. Ok to export these? + alu_dst_update/2, + fconv_dst_update/2, + fload_dst_update/2, + %% fmove_dst_update/2, + fp_dst_update/2, + fp_unop_dst_update/2, + load_dst_update/2, + load_address_dst_update/2, + load_atom_dst_update/2, + load_word_index_dst_update/2, + %% move_dst_update/2, + fixnumop_dst_update/2, + pp_instr/2, + %% pp_arg/2, + phi_arglist_update/2, + phi_redirect_pred/3]). + +%% +%% RTL +%% + +-record(rtl, {'fun', %% Name of the function (MFA) + arglist, %% List of argument names (formals) + is_closure, %% True if this is code for a closure. + is_leaf, %% True if this is a leaf function. + code, %% Linear list of RTL-instructions. + data, %% Data segment + var_range, %% {Min,Max} First and last name used for + %% regs, fpregs, or vars. + %% (they use a common namespace) + label_range, %% {Min,Max} First and last name used for labels + info=[] %% A keylist with arbitrary information. + }). + +mk_rtl(Fun, ArgList, Closure, Leaf, Code, Data, VarRange, LabelRange) -> + #rtl{'fun'=Fun, arglist=ArgList, code=Code, + data=Data, is_closure=Closure, is_leaf=Leaf, + var_range=VarRange, label_range=LabelRange}. +rtl_fun(#rtl{'fun'=Fun}) -> Fun. +rtl_params(#rtl{arglist=ArgList}) -> ArgList. +rtl_is_closure(#rtl{is_closure=Closure}) -> Closure. +rtl_is_leaf(#rtl{is_leaf=Leaf}) -> Leaf. +rtl_code(#rtl{code=Code}) -> Code. +rtl_code_update(Rtl, Code) -> Rtl#rtl{code=Code}. +rtl_data(#rtl{data=Data}) -> Data. +%% rtl_data_update(Rtl, Data) -> Rtl#rtl{data=Data}. +%% rtl_var_range(#rtl{var_range=VarRange}) -> VarRange. +%% rtl_var_range_update(Rtl, VarRange) -> Rtl#rtl{var_range=VarRange}. +%% rtl_label_range(#rtl{label_range=LabelRange}) -> LabelRange. +%% rtl_label_range_update(Rtl, LabelRange) -> Rtl#rtl{label_range=LabelRange}. +rtl_info(#rtl{info=Info}) -> Info. +rtl_info_update(Rtl, Info) -> Rtl#rtl{info=Info}. + +%%----------------------------------------------------------------------------- + +-include("hipe_rtl.hrl"). + +%%----------------------------------------------------------------------------- + +%% +%% move +%% + +mk_move(Dst, Src) -> #move{dst=Dst, src=Src}. +move_dst(#move{dst=Dst}) -> Dst. +move_dst_update(M, NewDst) -> M#move{dst=NewDst}. +move_src(#move{src=Src}) -> Src. +move_src_update(M, NewSrc) -> M#move{src=NewSrc}. +%% is_move(#move{}) -> true; +%% is_move(_) -> false. + +%% +%% multimove +%% + +mk_multimove(DstList, SrcList) -> + case length(DstList) =:= length(SrcList) of + true -> true; + false -> + exit({?MODULE,mk_multimove, + {"different arities",{dstlist,DstList},{srclist,SrcList}}}) + end, + #multimove{dstlist=DstList, srclist=SrcList}. +multimove_dstlist(#multimove{dstlist=DstList}) -> DstList. +multimove_dstlist_update(M, NewDstList) -> M#multimove{dstlist=NewDstList}. +multimove_srclist(#multimove{srclist=SrcList}) -> SrcList. +multimove_srclist_update(M, NewSrcList) -> M#multimove{srclist=NewSrcList}. +%% is_multimove(#multimove{}) -> true; +%% is_multimove(_) -> false. + +%% +%% phi +%% + +%% The id field is not entirely redundant. It is used in mappings +%% in the SSA pass since the dst field can change. +mk_phi(Var) -> #phi{dst = Var, id = Var, arglist = []}. +%% mk_phi(Var, ArgList) -> #phi{dst = Var, id = Var, arglist = ArgList}. +phi_dst(#phi{dst=Dst}) -> Dst. +phi_dst_update(Phi, NewDst) -> Phi#phi{dst = NewDst}. +phi_id(#phi{id=Id}) -> Id. +phi_args(Phi) -> [X || {_,X} <- phi_arglist(Phi)]. +phi_arg(Phi, Pred) -> + case lists:keyfind(Pred, 1, phi_arglist(Phi)) of + false -> + exit({?MODULE,phi_arg,{"Uknown Phi predecessor",Phi,{pred,Pred}}}); + {_, Var} -> Var + end. +phi_arglist(#phi{arglist=ArgList}) -> ArgList. +phi_arglist_update(P,NewArgList) ->P#phi{arglist=NewArgList}. +is_phi(#phi{}) -> true; +is_phi(_) -> false. +phi_enter_pred(Phi, Pred, Var) -> + Phi#phi{arglist=[{Pred,Var}|lists:keydelete(Pred, 1, phi_arglist(Phi))]}. +phi_remove_pred(Phi, Pred) -> + NewArgList = lists:keydelete(Pred, 1, phi_arglist(Phi)), + case NewArgList of + [Arg] -> %% the phi should be turned into a move instruction + {_Label,Var} = Arg, + mk_move(phi_dst(Phi), Var); + %% io:format("~nPhi (~w) turned into move (~w) when removing pred ~w~n",[Phi,Move,Pred]), + [_|_] -> + Phi#phi{arglist=NewArgList} + end. +phi_argvar_subst(Phi, Subst) -> + NewArgList = [{Pred,subst1(Subst, Var)} || {Pred,Var} <- phi_arglist(Phi)], + Phi#phi{arglist=NewArgList}. +phi_redirect_pred(P, OldPred, NewPred)-> + Subst = [{OldPred, NewPred}], + NewArgList = [{subst1(Subst, Pred), Var} || {Pred,Var} <- phi_arglist(P)], + P#phi{arglist=NewArgList}. + + +%% +%% alu +%% + +mk_alu(Dst, Src1, Op, Src2) -> + #alu{dst=Dst, src1=Src1, op=Op, src2=Src2}. +alu_dst(#alu{dst=Dst}) -> Dst. +alu_dst_update(Alu, NewDst) -> Alu#alu{dst=NewDst}. +alu_src1(#alu{src1=Src1}) -> Src1. +alu_src1_update(Alu, NewSrc) -> Alu#alu{src1=NewSrc}. +alu_src2(#alu{src2=Src2}) -> Src2. +alu_src2_update(Alu, NewSrc) -> Alu#alu{src2=NewSrc}. +alu_op(#alu{op=Op}) -> Op. + +%% +%% load +%% + +mk_load(Dst, Src, Offset) -> mk_load(Dst, Src, Offset, word, unsigned). +mk_load(Dst, Src, Offset, Size, Sign) -> + ?ASSERT((Sign =:= unsigned) orelse (Sign =:= signed)), + ?ASSERT((Size =:= word) orelse (Size =:= int32) orelse + (Size =:= int16) orelse (Size =:= byte)), + #load{dst=Dst, src=Src, offset=Offset, size=Size, sign=Sign}. +load_dst(#load{dst=Dst}) -> Dst. +load_dst_update(L, NewDst) -> L#load{dst=NewDst}. +load_src(#load{src=Src}) -> Src. +load_src_update(L, NewSrc) -> L#load{src=NewSrc}. +load_offset(#load{offset=Offset}) -> Offset. +load_offset_update(L, NewOffset) -> L#load{offset=NewOffset}. +load_size(#load{size=Size}) -> Size. +load_sign(#load{sign=Sign}) -> Sign. + +%% +%% load_atom +%% + +mk_load_atom(Dst, Atom) -> #load_atom{dst=Dst,atom=Atom}. +load_atom_dst(#load_atom{dst=Dst}) -> Dst. +load_atom_dst_update(L, NewDst) -> L#load_atom{dst=NewDst}. +load_atom_atom(#load_atom{atom=Atom}) -> Atom. + +mk_load_word_index(Dst, Block, Index) -> + #load_word_index{dst=Dst, block=Block, index=Index}. +load_word_index_dst(#load_word_index{dst=Dst}) -> Dst. +load_word_index_dst_update(L, NewDst) -> L#load_word_index{dst=NewDst}. +load_word_index_block(#load_word_index{block=Block}) -> Block. +load_word_index_index(#load_word_index{index=Index}) -> Index. + +mk_goto_index(Block, Index, Labels) -> + #goto_index{block=Block, index=Index, labels=Labels}. +goto_index_block(#goto_index{block=Block}) -> Block. +goto_index_index(#goto_index{index=Index}) -> Index. +goto_index_labels(#goto_index{labels=Labels}) -> Labels. + +%% +%% load_address +%% + +mk_load_address(Dst, Addr, Type) -> + #load_address{dst=Dst, addr=Addr, type=Type}. +load_address_dst(#load_address{dst=Dst}) -> Dst. +load_address_dst_update(LA, NewDst) -> LA#load_address{dst=NewDst}. +load_address_addr(#load_address{addr=Addr}) -> Addr. +load_address_addr_update(LoadAddress, NewAdr) -> + LoadAddress#load_address{addr=NewAdr}. +load_address_type(#load_address{type=Type}) -> Type. +%% load_address_type_update(LA, NewType) -> LA#load_address{type=NewType}. + +%% +%% store +%% + +mk_store(Base, Offset, Src) -> mk_store(Base, Offset, Src, word). +mk_store(Base, Offset, Src, Size) -> + ?ASSERT((Size =:= word) orelse (Size =:= int32) orelse + (Size =:= int16) orelse (Size =:= byte)), + #store{base=Base, src=Src, offset=Offset, size=Size}. +store_base(#store{base=Base}) -> Base. +store_base_update(S, NewBase) -> S#store{base=NewBase}. +store_offset(#store{offset=Offset}) -> Offset. +store_offset_update(S, NewOffset) -> S#store{offset=NewOffset}. +store_src(#store{src=Src}) -> Src. +store_src_update(S, NewSrc) -> S#store{src=NewSrc}. +store_size(#store{size=Size}) -> Size. + +%% +%% label +%% + +mk_label(Name) -> #label{name=Name}. +mk_new_label() -> mk_label(hipe_gensym:get_next_label(rtl)). +label_name(#label{name=Name}) -> Name. +is_label(#label{}) -> true; +is_label(_) -> false. + +%% +%% branch +%% + +mk_branch(Src1, Op, Src2, True, False) -> + mk_branch(Src1, Op, Src2, True, False, 0.5). +mk_branch(Src1, Op, Src2, True, False, P) -> + #branch{src1=Src1, 'cond'=Op, src2=Src2, true_label=True, + false_label=False, p=P}. +branch_src1(#branch{src1=Src1}) -> Src1. +branch_src1_update(Br, NewSrc) -> Br#branch{src1=NewSrc}. +branch_src2(#branch{src2=Src2}) -> Src2. +branch_src2_update(Br, NewSrc) -> Br#branch{src2=NewSrc}. +branch_cond(#branch{'cond'=Cond}) -> Cond. +branch_true_label(#branch{true_label=TrueLbl}) -> TrueLbl. +branch_true_label_update(Br, NewTrue) -> Br#branch{true_label=NewTrue}. +branch_false_label(#branch{false_label=FalseLbl}) -> FalseLbl. +branch_false_label_update(Br, NewFalse) -> Br#branch{false_label=NewFalse}. +branch_pred(#branch{p=P}) -> P. + +%% +%% alub +%% + +mk_alub(Dst, Src1, Op, Src2, Cond, True, False) -> + mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5). +mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) -> + #alub{dst=Dst, src1=Src1, op=Op, src2=Src2, 'cond'=Cond, + true_label=True, false_label=False, p=P}. +alub_dst(#alub{dst=Dst}) -> Dst. +alub_dst_update(A, NewDst) -> A#alub{dst=NewDst}. +alub_src1(#alub{src1=Src1}) -> Src1. +alub_src1_update(A, NewSrc) -> A#alub{src1=NewSrc}. +alub_op(#alub{op=Op}) -> Op. +alub_src2(#alub{src2=Src2}) -> Src2. +alub_src2_update(A, NewSrc) -> A#alub{src2=NewSrc}. +alub_cond(#alub{'cond'=Cond}) -> Cond. +alub_true_label(#alub{true_label=TrueLbl}) -> TrueLbl. +alub_true_label_update(A, NewTrue) -> A#alub{true_label=NewTrue}. +alub_false_label(#alub{false_label=FalseLbl}) -> FalseLbl. +alub_false_label_update(A, NewFalse) -> A#alub{false_label=NewFalse}. +alub_pred(#alub{p=P}) -> P. + +%% +%% switch +%% + +mk_switch(Src, Labels) -> #switch{src=Src, labels=Labels}. +mk_sorted_switch(Src, Labels, Order) -> + #switch{src=Src, labels=Labels, sorted_by=Order}. +switch_src(#switch{src=Src}) -> Src. +switch_src_update(I, N) -> I#switch{src=N}. +switch_labels(#switch{labels=Labels}) -> Labels. +switch_labels_update(I,N) -> I#switch{labels=N}. +switch_sort_order(#switch{sorted_by=Order}) -> Order. +%% switch_sort_order_update(I,N) -> I#switch{sorted_by=N}. + +%% +%% goto +%% + +mk_goto(Label) -> #goto{label=Label}. +goto_label(#goto{label=Label}) -> Label. +goto_label_update(I, NewLabel) -> + I#goto{label=NewLabel}. +is_goto(#goto{}) -> true; +is_goto(_) -> false. + +%% +%% call +%% + +mk_call(DstList, Fun, ArgList, Continuation, FailContinuation, Type) -> + case Type of + remote -> ok; + not_remote -> ok + end, + #call{dstlist=DstList, 'fun'=Fun, arglist=ArgList, type=Type, + continuation=Continuation, + failcontinuation=FailContinuation}. +call_dstlist(#call{dstlist=DstList}) -> DstList. +call_dstlist_update(C, NewDstList) -> C#call{dstlist=NewDstList}. +call_fun(#call{'fun'=Fun}) -> Fun. +call_fun_update(C, F) -> C#call{'fun'=F}. +call_arglist(#call{arglist=ArgList}) -> ArgList. +call_arglist_update(C, NewArgList) -> C#call{arglist=NewArgList}. +call_continuation(#call{continuation=Continuation}) -> Continuation. +call_fail(#call{failcontinuation=FailContinuation}) -> FailContinuation. +call_type(#call{type=Type}) -> Type. +call_continuation_update(C, NewCont) -> C#call{continuation=NewCont}. +call_fail_update(C, NewCont) -> C#call{failcontinuation=NewCont}. +is_call(#call{}) -> true; +is_call(_) -> false. +call_is_known(C) -> + Fun = call_fun(C), + call_or_enter_fun_is_known(Fun). + +call_or_enter_fun_is_known(Fun) -> + case is_atom(Fun) of + true -> true; %% make the expected common case fast + false -> + case is_reg(Fun) of + true -> false; + false -> + case is_var(Fun) of + true -> false; + false -> + case Fun of + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + true; + {F,A} when is_atom(F), is_integer(A), A >= 0 -> + true; + _ -> %% colored versions of rtl_reg or rtl_var (used in SSA) + false + end + end + end + end. + +%% +%% enter +%% + +mk_enter(Fun, ArgList, Type) -> + case Type of + remote -> ok; + not_remote -> ok % {local,primop,closure,pointer} + end, + #enter{'fun'=Fun, arglist=ArgList, type=Type}. +enter_fun(#enter{'fun'=Fun}) -> Fun. +enter_fun_update(I, F) -> I#enter{'fun' = F}. +enter_arglist(#enter{arglist=ArgList}) -> ArgList. +enter_arglist_update(E, NewArgList) -> E#enter{arglist=NewArgList}. +enter_type(#enter{type=Type}) -> Type. +enter_is_known(E) -> + Fun = enter_fun(E), + call_or_enter_fun_is_known(Fun). + +%% +%% return +%% + +mk_return(VarList) -> #return{varlist=VarList}. +return_varlist(#return{varlist=VarList}) -> VarList. +return_varlist_update(R, NewVarList) -> R#return{varlist=NewVarList}. + +%% +%% gctests +%% + +mk_gctest(Words) when is_integer(Words) -> #gctest{words=mk_imm(Words)}; +mk_gctest(Reg) -> #gctest{words=Reg}. % This handles rtl_regs and rtl_vars +gctest_words(#gctest{words=Words}) -> Words. +gctest_words_update(S, NewWords) -> S#gctest{words=NewWords}. + + +%% +%% fixnumop +%% + +mk_fixnumop(Dst, Src, Type) -> + #fixnumop{dst=Dst, src=Src, type=Type}. +fixnumop_dst(#fixnumop{dst=Dst}) -> Dst. +fixnumop_dst_update(S, Dst) -> S#fixnumop{dst=Dst}. +fixnumop_src(#fixnumop{src=Src}) -> Src. +fixnumop_src_update(S, Src) -> S#fixnumop{src=Src}. +fixnumop_type(#fixnumop{type=Type}) -> Type. + +%% +%% comments +%% + +mk_comment(Text) -> #comment{text=Text}. +comment_text(#comment{text=Text}) -> Text. +is_comment(#comment{}) -> true; +is_comment(_) -> false. + +%%------------------------------------------------------------------------- +%% Floating point stuff. +%%------------------------------------------------------------------------- + +%% +%% fload +%% + +mk_fload(Dst, Src, Offset) -> #fload{dst=Dst, src=Src, offset=Offset}. +fload_dst(#fload{dst=Dst}) -> Dst. +fload_dst_update(L, NewDst) -> L#fload{dst=NewDst}. +fload_src(#fload{src=Src}) -> Src. +fload_src_update(L, NewSrc) -> L#fload{src=NewSrc}. +fload_offset(#fload{offset=Offset}) -> Offset. +fload_offset_update(L, NewOffset) -> L#fload{offset=NewOffset}. + +%% +%% fstore +%% + +mk_fstore(Base, Offset, Src) -> + #fstore{base=Base, offset=Offset, src=Src}. +fstore_base(#fstore{base=Base}) -> Base. +fstore_base_update(F, NewBase) -> F#fstore{base=NewBase}. +fstore_offset(#fstore{offset=Offset}) -> Offset. +fstore_offset_update(F, NewOff) -> F#fstore{offset=NewOff}. +fstore_src(#fstore{src=Src}) -> Src. +fstore_src_update(F, NewSrc) -> F#fstore{src=NewSrc}. + +%% +%% fp +%% + +mk_fp(Dst, Src1, Op, Src2) -> + #fp{dst=Dst, src1=Src1, op=Op, src2=Src2}. +fp_dst(#fp{dst=Dst}) -> Dst. +fp_dst_update(Fp, NewDst) -> Fp#fp{dst=NewDst}. +fp_src1(#fp{src1=Src1}) -> Src1. +fp_src1_update(Fp, NewSrc) -> Fp#fp{src1=NewSrc}. +fp_src2(#fp{src2=Src2}) -> Src2. +fp_src2_update(Fp, NewSrc) -> Fp#fp{src2=NewSrc}. +fp_op(#fp{op=Op}) -> Op. + +%% +%% fp_unop +%% + +mk_fp_unop(Dst, Src, Op) -> + #fp_unop{dst=Dst, src=Src, op=Op}. +fp_unop_dst(#fp_unop{dst=Dst}) -> Dst. +fp_unop_dst_update(Fp, NewDst) -> Fp#fp_unop{dst=NewDst}. +fp_unop_src(#fp_unop{src=Src}) -> Src. +fp_unop_src_update(Fp, NewSrc) -> Fp#fp_unop{src=NewSrc}. +fp_unop_op(#fp_unop{op=Op}) -> Op. + +%% +%% fmove +%% + +mk_fmove(X, Y) -> #fmove{dst=X, src=Y}. +fmove_dst(#fmove{dst=Dst}) -> Dst. +fmove_dst_update(M, NewDst) -> M#fmove{dst=NewDst}. +fmove_src(#fmove{src=Src}) -> Src. +fmove_src_update(M, NewSrc) -> M#fmove{src=NewSrc}. + +%% +%% fconv +%% + +mk_fconv(X, Y) -> #fconv{dst=X, src=Y}. +fconv_dst(#fconv{dst=Dst}) -> Dst. +fconv_dst_update(C, NewDst) -> C#fconv{dst=NewDst}. +fconv_src(#fconv{src=Src}) -> Src. +fconv_src_update(C, NewSrc) -> C#fconv{src=NewSrc}. + +%% +%% The values +%% +%% change_vars_to_regs(Vars) -> +%% change_vars_to_regs(Vars, []). +%% change_vars_to_regs([Var|Rest], Acc) -> +%% change_vars_to_regs(Rest,[change_var_to_reg(Var)|Acc]); +%% change_vars_to_regs([], Acc) -> +%% lists:reverse(Acc). +%% +%% change_var_to_reg(Var) -> +%% mk_reg(var_index(Var)). + +-record(rtl_reg, {index :: integer(), + is_gc_safe :: boolean()}). + +mk_reg(Num, IsGcSafe) when is_integer(Num), Num >= 0 -> + #rtl_reg{index=Num,is_gc_safe=IsGcSafe}. +mk_reg(Num) -> mk_reg(Num, false). +mk_reg_gcsafe(Num) -> mk_reg(Num, true). +mk_new_reg() -> mk_reg(hipe_gensym:get_next_var(rtl), false). +mk_new_reg_gcsafe() -> mk_reg(hipe_gensym:get_next_var(rtl), true). +reg_index(#rtl_reg{index=Index}) -> Index. +reg_is_gcsafe(#rtl_reg{is_gc_safe=IsGcSafe}) -> IsGcSafe. +is_reg(#rtl_reg{}) -> true; +is_reg(_) -> false. + +-record(rtl_var, {index :: non_neg_integer()}). + +mk_var(Num) when is_integer(Num), Num >= 0 -> #rtl_var{index=Num}. +mk_new_var() -> mk_var(hipe_gensym:get_next_var(rtl)). +var_index(#rtl_var{index=Index}) -> Index. +is_var(#rtl_var{}) -> true; +is_var(_) -> false. + +-record(rtl_fpreg, {index :: non_neg_integer()}). + +mk_fpreg(Num) when is_integer(Num), Num >= 0 -> #rtl_fpreg{index=Num}. +mk_new_fpreg() -> mk_fpreg(hipe_gensym:get_next_var(rtl)). +fpreg_index(#rtl_fpreg{index=Index}) -> Index. +is_fpreg(#rtl_fpreg{}) -> true; +is_fpreg(_) -> false. + +-record(rtl_imm, {value}). + +mk_imm(Value) -> #rtl_imm{value=Value}. +imm_value(#rtl_imm{value=Value}) -> Value. +is_imm(#rtl_imm{}) -> true; +is_imm(_) -> false. + +-record(rtl_const_lbl, {label}). + +mk_const_label(Label) -> #rtl_const_lbl{label=Label}. +const_label_label(#rtl_const_lbl{label=Label}) -> Label. +is_const_label(#rtl_const_lbl{}) -> true; +is_const_label(_) -> false. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Utilities - no representation visible below this point +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% +%% @doc Returns the list of variables, constant labels and immediates +%% an RTL instruction uses. +%% + +uses(I) -> + remove_imms_and_const_lbls(args(I)). + +%% +%% @doc Returns the list of variables an RTL instruction uses. +%% + +args(I) -> + case I of + #alu{} -> [alu_src1(I), alu_src2(I)]; + #alub{} -> [alub_src1(I), alub_src2(I)]; + #branch{} -> [branch_src1(I), branch_src2(I)]; + #call{} -> + Args = call_arglist(I) ++ hipe_rtl_arch:call_used(), + case call_is_known(I) of + false -> [call_fun(I) | Args]; + true -> Args + end; + #comment{} -> []; + #enter{} -> + Args = enter_arglist(I) ++ hipe_rtl_arch:tailcall_used(), + case enter_is_known(I) of + false -> [enter_fun(I) | Args]; + true -> Args + end; + #fconv{} -> [fconv_src(I)]; + #fixnumop{} -> [fixnumop_src(I)]; + #fload{} -> [fload_src(I), fload_offset(I)]; + #fmove{} -> [fmove_src(I)]; + #fp{} -> [fp_src1(I), fp_src2(I)]; + #fp_unop{} -> [fp_unop_src(I)]; + #fstore{} -> [fstore_base(I), fstore_offset(I), fstore_src(I)]; + #goto{} -> []; + #goto_index{} -> []; + #gctest{} -> [gctest_words(I)]; + #label{} -> []; + #load{} -> [load_src(I), load_offset(I)]; + #load_address{} -> []; + #load_atom{} -> []; + #load_word_index{} -> []; + #move{} -> [move_src(I)]; + #multimove{} -> multimove_srclist(I); + #phi{} -> phi_args(I); + #return{} -> return_varlist(I) ++ hipe_rtl_arch:return_used(); + #store{} -> [store_base(I), store_offset(I), store_src(I)]; + #switch{} -> [switch_src(I)] + end. + +%% +%% @doc Returns a list of variables that an RTL instruction defines. +%% + +defines(Instr) -> + Defs = case Instr of + #alu{} -> [alu_dst(Instr)]; + #alub{} -> [alub_dst(Instr)]; + #branch{} -> []; + #call{} -> call_dstlist(Instr) ++ hipe_rtl_arch:call_defined(); + #comment{} -> []; + #enter{} -> []; + #fconv{} -> [fconv_dst(Instr)]; + #fixnumop{} -> [fixnumop_dst(Instr)]; + #fload{} -> [fload_dst(Instr)]; + #fmove{} -> [fmove_dst(Instr)]; + #fp{} -> [fp_dst(Instr)]; + #fp_unop{} -> [fp_unop_dst(Instr)]; + #fstore{} -> []; + #gctest{} -> []; + #goto{} -> []; + #goto_index{} -> []; + #label{} -> []; + #load{} -> [load_dst(Instr)]; + #load_address{} -> [load_address_dst(Instr)]; + #load_atom{} -> [load_atom_dst(Instr)]; + #load_word_index{} -> [load_word_index_dst(Instr)]; + #move{} -> [move_dst(Instr)]; + #multimove{} -> multimove_dstlist(Instr); + #phi{} -> [phi_dst(Instr)]; + #return{} -> []; + #store{} -> []; + #switch{} -> [] + end, + remove_imms_and_const_lbls(Defs). + +%% @spec remove_imms_and_const_lbls([rtl_argument()]) -> [rtl_argument()] +%% +%% @doc Removes all RTL immediates and constant labels from a list of arguments. + +remove_imms_and_const_lbls([]) -> + []; +remove_imms_and_const_lbls([Arg|Args]) -> + case is_imm(Arg) orelse is_const_label(Arg) of + true -> remove_imms_and_const_lbls(Args); + false -> [Arg | remove_imms_and_const_lbls(Args)] + end. + +%% +%% Substitution: replace occurrences of X by Y if {X,Y} is in Subst. +%% +%% subst(Subst, X) -> +%% subst_defines(Subst, subst_uses(Subst,X)). + +subst_uses(Subst, I) -> + case I of + #alu{} -> + I0 = alu_src1_update(I, subst1(Subst, alu_src1(I))), + alu_src2_update(I0, subst1(Subst, alu_src2(I))); + #alub{} -> + I0 = alub_src1_update(I, subst1(Subst, alub_src1(I))), + alub_src2_update(I0, subst1(Subst, alub_src2(I))); + #branch{} -> + I0 = branch_src1_update(I, subst1(Subst, branch_src1(I))), + branch_src2_update(I0, subst1(Subst, branch_src2(I))); + #call{} -> + case call_is_known(I) of + false -> + I0 = call_fun_update(I, subst1(Subst, call_fun(I))), + call_arglist_update(I0, subst_list(Subst, call_arglist(I0))); + true -> + call_arglist_update(I, subst_list(Subst, call_arglist(I))) + end; + #comment{} -> + I; + #enter{} -> + case enter_is_known(I) of + false -> + I0 = enter_fun_update(I, subst1(Subst, enter_fun(I))), + enter_arglist_update(I0, subst_list(Subst, enter_arglist(I0))); + true -> + enter_arglist_update(I, subst_list(Subst, enter_arglist(I))) + end; + #fconv{} -> + fconv_src_update(I, subst1(Subst, fconv_src(I))); + #fixnumop{} -> + fixnumop_src_update(I, subst1(Subst, fixnumop_src(I))); + #fload{} -> + I0 = fload_src_update(I, subst1(Subst, fload_src(I))), + fload_offset_update(I0, subst1(Subst, fload_offset(I))); + #fmove{} -> + fmove_src_update(I, subst1(Subst, fmove_src(I))); + #fp{} -> + I0 = fp_src1_update(I, subst1(Subst, fp_src1(I))), + fp_src2_update(I0, subst1(Subst, fp_src2(I))); + #fp_unop{} -> + fp_unop_src_update(I, subst1(Subst, fp_unop_src(I))); + #fstore{} -> + I0 = fstore_src_update(I, subst1(Subst, fstore_src(I))), + I1 = fstore_base_update(I0, subst1(Subst, fstore_base(I))), + fstore_offset_update(I1, subst1(Subst, fstore_offset(I))); + #goto{} -> + I; + #goto_index{} -> + I; + #gctest{} -> + gctest_words_update(I, subst1(Subst, gctest_words(I))); + #label{} -> + I; + #load{} -> + I0 = load_src_update(I, subst1(Subst, load_src(I))), + load_offset_update(I0, subst1(Subst, load_offset(I))); + #load_address{} -> + I; + #load_atom{} -> + I; + #load_word_index{} -> + I; + #move{} -> + move_src_update(I, subst1(Subst, move_src(I))); + #multimove{} -> + multimove_srclist_update(I, subst_list(Subst, multimove_srclist(I))); + #phi{} -> + phi_argvar_subst(I, Subst); + #return{} -> + return_varlist_update(I, subst_list(Subst, return_varlist(I))); + #store{} -> + I0 = store_src_update(I, subst1(Subst, store_src(I))), + I1 = store_base_update(I0, subst1(Subst, store_base(I))), + store_offset_update(I1, subst1(Subst, store_offset(I))); + #switch{} -> + switch_src_update(I, subst1(Subst, switch_src(I))) + end. + +subst_defines(Subst, I)-> + case I of + #alu{} -> + alu_dst_update(I, subst1(Subst, alu_dst(I))); + #alub{} -> + alub_dst_update(I, subst1(Subst, alub_dst(I))); + #branch{} -> + I; + #call{} -> + call_dstlist_update(I, subst_list(Subst, call_dstlist(I))); + #comment{} -> + I; + #enter{} -> + I; + #fconv{} -> + fconv_dst_update(I, subst1(Subst, fconv_dst(I))); + #fixnumop{} -> + fixnumop_dst_update(I, subst1(Subst, fixnumop_dst(I))); + #fload{} -> + fload_dst_update(I, subst1(Subst, fload_dst(I))); + #fmove{} -> + fmove_dst_update(I, subst1(Subst, fmove_dst(I))); + #fp{} -> + fp_dst_update(I, subst1(Subst, fp_dst(I))); + #fp_unop{} -> + fp_unop_dst_update(I, subst1(Subst, fp_unop_dst(I))); + #fstore{} -> + I; + #gctest{} -> + I; + #goto{} -> + I; + #goto_index{} -> + I; + #label{} -> + I; + #load{} -> + load_dst_update(I, subst1(Subst, load_dst(I))); + #load_address{} -> + load_address_dst_update(I, subst1(Subst, load_address_dst(I))); + #load_atom{} -> + load_atom_dst_update(I, subst1(Subst, load_atom_dst(I))); + #load_word_index{} -> + load_word_index_dst_update(I, subst1(Subst, load_word_index_dst(I))); + #move{} -> + move_dst_update(I, subst1(Subst, move_dst(I))); + #multimove{} -> + multimove_dstlist_update(I, subst_list(Subst, multimove_dstlist(I))); + #phi{} -> + phi_dst_update(I, subst1(Subst, phi_dst(I))); + #return{} -> + I; + #store{} -> + I; + #switch{} -> + I + end. + +subst_list(S, Xs) -> + [subst1(S, X) || X <- Xs]. + +subst1([], X) -> X; +subst1([{X,Y}|_], X) -> Y; +subst1([_|Xs], X) -> subst1(Xs,X). + +%% @spec is_safe(rtl_instruction()) -> boolean() +%% +%% @doc Succeeds if an RTL instruction is safe and can be deleted if the +%% result is not used. + +is_safe(Instr) -> + case Instr of + #alu{} -> true; + #alub{} -> false; + #branch{} -> false; + #call{} -> false; + #comment{} -> false; + #enter{} -> false; + #fconv{} -> true; + #fixnumop{} -> true; + #fload{} -> true; + #fmove{} -> true; + #fp{} -> false; + #fp_unop{} -> false; + #fstore{} -> false; + #gctest{} -> false; + #goto{} -> false; + #goto_index{} -> false; % ??? + #label{} -> true; + #load{} -> true; + #load_address{} -> true; + #load_atom{} -> true; + #load_word_index{} -> true; + #move{} -> true; + #multimove{} -> true; + #phi{} -> true; + #return{} -> false; + #store{} -> false; + #switch{} -> false %% Maybe this is safe... + end. + +%% +%% True if argument is an alu-operator +%% + +%% is_alu_op(add) -> true; +%% is_alu_op(sub) -> true; +%% is_alu_op('or') -> true; +%% is_alu_op('and') -> true; +%% is_alu_op('xor') -> true; +%% is_alu_op(andnot) -> true; +%% is_alu_op(sll) -> true; +%% is_alu_op(srl) -> true; +%% is_alu_op(sra) -> true; +%% is_alu_op(_) -> false. + +%% @spec is_shift_op(rtl_operator()) -> boolean() +%% +%% @doc Succeeds if its argument is an RTL operator. +is_shift_op(sll) -> true; +is_shift_op(srl) -> true; +is_shift_op(sra) -> true; +is_shift_op(_) -> false. + + +%% +%% True if argument is an relational operator +%% + +%% is_rel_op(eq) -> true; +%% is_rel_op(ne) -> true; +%% is_rel_op(gt) -> true; +%% is_rel_op(gtu) -> true; +%% is_rel_op(ge) -> true; +%% is_rel_op(geu) -> true; +%% is_rel_op(lt) -> true; +%% is_rel_op(ltu) -> true; +%% is_rel_op(le) -> true; +%% is_rel_op(leu) -> true; +%% is_rel_op(overflow) -> true; +%% is_rel_op(not_overflow) -> true; +%% is_rel_op(_) -> false. + +redirect_jmp(Jmp, ToOld, ToNew) -> + %% OBS: In a jmp instruction more than one labels may be identical + %% and thus need redirection! + case Jmp of + #branch{} -> + TmpJmp = case branch_true_label(Jmp) of + ToOld -> branch_true_label_update(Jmp, ToNew); + _ -> Jmp + end, + case branch_false_label(TmpJmp) of + ToOld -> + branch_false_label_update(TmpJmp, ToNew); + _ -> + TmpJmp + end; + #switch{} -> + NewLbls = [case Lbl =:= ToOld of + true -> ToNew; + false -> Lbl + end || Lbl <- switch_labels(Jmp)], + switch_labels_update(Jmp, NewLbls); + #alub{} -> + TmpJmp = case alub_true_label(Jmp) of + ToOld -> alub_true_label_update(Jmp, ToNew); + _ -> Jmp + end, + case alub_false_label(TmpJmp) of + ToOld -> alub_false_label_update(TmpJmp, ToNew); + _ -> TmpJmp + end; + #goto{} -> + case goto_label(Jmp) of + ToOld -> goto_label_update(Jmp, ToNew); + _ -> Jmp + end; + #call{} -> + TmpJmp = case call_continuation(Jmp) of + ToOld -> call_continuation_update(Jmp, ToNew); + _ -> Jmp + end, + case call_fail(TmpJmp) of + ToOld -> call_fail_update(TmpJmp, ToNew); + _ -> TmpJmp + end; + _ -> + Jmp + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% highest_var(Code) -> +%% highest_var(Code,0). +%% +%% highest_var([I|Is],Max) -> +%% Defs = defines(I), +%% Uses = uses(I), +%% highest_var(Is,new_max(Defs++Uses,Max)); +%% highest_var([],Max) -> +%% Max. +%% +%% new_max([V|Vs],Max) -> +%% VName = +%% case is_var(V) of +%% true -> +%% var_index(V); +%% false -> +%% case is_fpreg(V) of +%% true -> +%% fpreg_index(V); +%% _ -> +%% reg_index(V) +%% end +%% end, +%% if VName > Max -> +%% new_max(Vs, VName); +%% true -> +%% new_max(Vs, Max) +%% end; +%% new_max([],Max) -> +%% Max. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% @doc Pretty-printer for RTL. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +pp(Rtl) -> + pp(standard_io, Rtl). + +pp_block(Instrs) -> + pp_instrs(standard_io, Instrs). + +pp(Dev, Rtl) -> + io:format(Dev, "~w(", [rtl_fun(Rtl)]), + pp_args(Dev, rtl_params(Rtl)), + io:format(Dev, ") ->~n", []), + case rtl_is_closure(Rtl) of + true -> + io:format(Dev, ";; Closure\n", []); + false -> ok + end, + case rtl_is_leaf(Rtl) of + true -> + io:format(Dev, ";; Leaf function\n", []); + false -> ok + end, + io:format(Dev, ";; Info: ~w\n", [rtl_info(Rtl)]), + io:format(Dev, ".DataSegment\n", []), + hipe_data_pp:pp(Dev, rtl_data(Rtl), rtl, ""), + io:format(Dev, ".CodeSegment\n", []), + pp_instrs(Dev, rtl_code(Rtl)). + +pp_instrs(_Dev, []) -> + ok; +pp_instrs(Dev, [I|Is]) -> + try pp_instr(Dev, I) + catch _:_ -> io:format("*** ~w ***\n", [I]) + end, + pp_instrs(Dev, Is). + +pp_instr(Dev, I) -> + case I of + #phi{} -> + io:format(Dev, " ", []), + pp_arg(Dev, phi_dst(I)), + io:format(Dev, " <- phi(", []), + pp_phi_args(Dev, phi_arglist(I)), + io:format(Dev, ")~n", []); + #move{} -> + io:format(Dev, " ", []), + pp_arg(Dev, move_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, move_src(I)), + io:format(Dev, "~n", []); + #multimove{} -> + io:format(Dev, " ", []), + pp_args(Dev, multimove_dstlist(I)), + io:format(Dev, " <= ", []), + pp_args(Dev, multimove_srclist(I)), + io:format(Dev, "~n", []); + #alu{} -> + io:format(Dev, " ", []), + pp_arg(Dev, alu_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, alu_src1(I)), + io:format(Dev, " ~w ", [alu_op(I)]), + pp_arg(Dev, alu_src2(I)), + io:format(Dev, "~n", []); + #load{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_dst(I)), + io:format(Dev, " <- [", []), + pp_arg(Dev, load_src(I)), + io:format(Dev, "+", []), + pp_arg(Dev, load_offset(I)), + io:format(Dev, "]", []), + case load_sign(I) of + signed -> io:format(Dev, " -signed",[]); + _ -> ok + end, + case load_size(I) of + byte -> io:format(Dev, " -byte",[]); + int16 -> io:format(Dev, " -int16",[]); + int32 -> io:format(Dev, " -int32",[]); + _ -> ok + end, + io:format(Dev, "~n", []); + #load_atom{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_atom_dst(I)), + io:format(Dev, " <- atom_no(\'~s\')~n", [load_atom_atom(I)]); + #load_word_index{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_word_index_dst(I)), + io:format(Dev, " <- word_index_no( DL~p[~p] )~n", + [load_word_index_block(I),load_word_index_index(I)]); + #goto_index{} -> + io:format(Dev, " ", []), + io:format(Dev, "goto_index DL~p[~p]~n", + [goto_index_block(I), goto_index_index(I)]); + #load_address{} -> + io:format(Dev, " ", []), + pp_arg(Dev, load_address_dst(I)), + case load_address_type(I) of + constant -> + io:format(Dev, " <- DL~p~n", [load_address_addr(I)]); + closure -> + io:format(Dev, " <- L~p [closure]~n", [load_address_addr(I)]); + Type -> + io:format(Dev, " <- L~p [~p]~n", [load_address_addr(I),Type]) + end; + #store{} -> + io:format(Dev, " [", []), + pp_arg(Dev, store_base(I)), + io:format(Dev, "+", []), + pp_arg(Dev, store_offset(I)), + io:format(Dev, "] <- ", []), + pp_arg(Dev, store_src(I)), + case store_size(I) of + byte -> io:format(Dev, " -byte",[]); + int16 -> io:format(Dev, " -int16",[]); + int32 -> io:format(Dev, " -int32",[]); + _ -> ok + end, + io:format(Dev, "~n", []); + #label{} -> + io:format(Dev, "L~w:~n", [label_name(I)]); + #branch{} -> + io:format(Dev, " if (", []), + pp_arg(Dev, branch_src1(I)), + io:format(Dev, " ~w ", [branch_cond(I)]), + pp_arg(Dev, branch_src2(I)), + io:format(Dev, ") then L~w (~.2f) else L~w~n", + [branch_true_label(I), branch_pred(I), branch_false_label(I)]); + #switch{} -> + io:format(Dev, " switch (", []), + pp_arg(Dev, switch_src(I)), + io:format(Dev, ") <", []), + pp_switch_labels(Dev, switch_labels(I)), + io:format(Dev, ">\n", []); + #alub{} -> + io:format(Dev, " ", []), + pp_arg(Dev, alub_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, alub_src1(I)), + io:format(Dev, " ~w ", [alub_op(I)]), + pp_arg(Dev, alub_src2(I)), + io:format(Dev, " if",[]), + io:format(Dev, " ~w ", [alub_cond(I)]), + io:format(Dev, "then L~w (~.2f) else L~w~n", + [alub_true_label(I), alub_pred(I), alub_false_label(I)]); + #goto{} -> + io:format(Dev, " goto L~w~n", [goto_label(I)]); + #call{} -> + io:format(Dev, " ", []), + pp_args(Dev, call_dstlist(I)), + io:format(Dev, " <- ", []), + case call_is_known(I) of + true -> + case call_fun(I) of + F when is_atom(F) -> + io:format(Dev, "~w(", [F]); + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w:~w(", [M, F]); + {F,A} when is_atom(F), is_integer(A), A >=0 -> + io:format(Dev, "~w(", [F]) + end; + false -> + io:format(Dev, "(",[]), + pp_arg(Dev, call_fun(I)), + io:format(Dev, ")(",[]) + end, + pp_args(Dev, call_arglist(I)), + io:format(Dev, ")", []), + case call_continuation(I) of + [] -> true; + CC -> + io:format(Dev, " then L~w", [CC]) + end, + case call_fail(I) of + [] -> true; + L -> + io:format(Dev, " fail to L~w", [L]) + end, + io:format(Dev, "~n", []); + #enter{} -> + io:format(Dev, " ", []), + case enter_is_known(I) of + true -> + case enter_fun(I) of + F when is_atom(F) -> + io:format(Dev, "~w(", [F]); + {M,F,A} when is_atom(M), is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w:~w(", [M, F]); + {F,A} when is_atom(F), is_integer(A), A >= 0 -> + io:format(Dev, "~w(", [F]) + end; + false -> + io:format(Dev, "(",[]), + pp_arg(Dev, enter_fun(I)), + io:format(Dev, ")(",[]) + end, + pp_args(Dev, enter_arglist(I)), + io:format(Dev, ")~n", []); + #return{} -> + io:format(Dev, " return(", []), + pp_args(Dev, return_varlist(I)), + io:format(Dev, ")~n", []); + #comment{} -> + io:format(Dev, " ;; ~p~n", [comment_text(I)]); + #fixnumop{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fixnumop_dst(I)), + io:format(Dev, " <- ", []), + case fixnumop_type(I) of + tag -> + io:format(Dev, "fixnum_tag(", []); + untag -> + io:format(Dev, "fixnum_untag(", []) + end, + pp_arg(Dev, fixnumop_src(I)), + io:format(Dev, ")~n", []); + #gctest{} -> + io:format(Dev, " gctest(", []), + pp_arg(Dev, gctest_words(I)), + io:format(Dev, ")~n", []); + %% Floating point handling instructions below + #fload{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fload_dst(I)), + io:format(Dev, " <-f [", []), + pp_arg(Dev, fload_src(I)), + io:format(Dev, "+", []), + pp_arg(Dev, fload_offset(I)), + io:format(Dev, "]~n", []); + #fstore{} -> + io:format(Dev, " [", []), + pp_arg(Dev, fstore_base(I)), + io:format(Dev, "+", []), + pp_arg(Dev, fstore_offset(I)), + io:format(Dev, "] <- ", []), + pp_arg(Dev, fstore_src(I)), + io:format(Dev, "~n", []); + #fp{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fp_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, fp_src1(I)), + io:format(Dev, " ~w ", [fp_op(I)]), + pp_arg(Dev, fp_src2(I)), + io:format(Dev, "~n", []); + #fp_unop{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fp_unop_dst(I)), + io:format(Dev, " <- ", []), + io:format(Dev, " ~w ", [fp_unop_op(I)]), + pp_arg(Dev, fp_unop_src(I)), + io:format(Dev, "~n", []); + #fmove{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fmove_dst(I)), + io:format(Dev, " <- ", []), + pp_arg(Dev, fmove_src(I)), + io:format(Dev, "~n", []); + #fconv{} -> + io:format(Dev, " ", []), + pp_arg(Dev, fconv_dst(I)), + io:format(Dev, " <-fconv ", []), + pp_arg(Dev, fconv_src(I)), + io:format(Dev, "~n", []); + Other -> + exit({?MODULE,pp_instr,{"unknown RTL instruction",Other}}) + end. + +pp_args(_Dev, []) -> + ok; +pp_args(Dev, [A]) -> + pp_arg(Dev, A); +pp_args(Dev, [A|As]) -> + pp_arg(Dev, A), + io:format(Dev, ", ", []), + pp_args(Dev, As). + +pp_phi_args(_Dev, []) -> ok; +pp_phi_args(Dev, [{Pred,A}]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}", []); +pp_phi_args(Dev, [{Pred,A}|Args]) -> + io:format(Dev, "{~w, ", [Pred]), + pp_arg(Dev, A), + io:format(Dev, "}, ", []), + pp_phi_args(Dev, Args); +pp_phi_args(Dev, Args) -> + pp_args(Dev, Args). + +pp_hard_reg(Dev, N) -> + io:format(Dev, "~s", [hipe_rtl_arch:reg_name(N)]). + +pp_reg(Dev, Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(Dev, reg_index(Arg)); + false -> + io:format(Dev, "r~w", [reg_index(Arg)]) + end. + +pp_var(Dev, Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(Dev, var_index(Arg)); + false -> + io:format(Dev, "v~w", [var_index(Arg)]) + end. + +pp_arg(Dev, A) -> + case is_var(A) of + true -> + pp_var(Dev, A); + false -> + case is_reg(A) of + true -> + pp_reg(Dev, A); + false -> + case is_imm(A) of + true -> + io:format(Dev, "~w", [imm_value(A)]); + false -> + case is_fpreg(A) of + true -> + io:format(Dev, "f~w", [fpreg_index(A)]); + false -> + case is_const_label(A) of + true -> + io:format(Dev, "DL~w", [const_label_label(A)]); + false -> + exit({?MODULE,pp_arg,{"bad RTL arg",A}}) + end + end + end + end + end. + +pp_switch_labels(Dev,Lbls) -> + pp_switch_labels(Dev,Lbls,1). + +pp_switch_labels(Dev, [L], _Pos) -> + io:format(Dev, "L~w", [L]); +pp_switch_labels(Dev, [L|Ls], Pos) -> + io:format(Dev, "L~w, ", [L]), + NewPos = + case Pos of + 5 -> io:format(Dev, "\n ",[]), + 0; + N -> N + 1 + end, + pp_switch_labels(Dev, Ls, NewPos); +pp_switch_labels(_Dev, [], _) -> + ok. diff --git a/lib/hipe/rtl/hipe_rtl.hrl b/lib/hipe/rtl/hipe_rtl.hrl new file mode 100644 index 0000000000..974e40f830 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl.hrl @@ -0,0 +1,61 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Provides abstract datatypes for HiPE's RTL (Register Transfer Language). +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%--------------------------------------------------------------------- + +-record(alu, {dst, src1, op, src2}). +-record(alub, {dst, src1, op, src2, 'cond', true_label, false_label, p}). +-record(branch, {src1, src2, 'cond', true_label, false_label, p}). +-record(call, {dstlist, 'fun', arglist, type, continuation, failcontinuation}). +-record(comment, {text}). +-record(enter, {'fun', arglist, type}). +-record(fconv, {dst, src}). +-record(fixnumop, {dst, src, type}). +-record(fload, {dst, src, offset}). +-record(fmove, {dst, src}). +-record(fp, {dst, src1, op, src2}). +-record(fp_unop, {dst, src, op}). +-record(fstore, {base, offset, src}). +-record(gctest, {words}). +-record(goto, {label}). +-record(goto_index, {block, index, labels}). +-record(label, {name}). +-record(load, {dst, src, offset, size, sign}). +-record(load_address, {dst, addr, type}). +-record(load_atom, {dst, atom}). +-record(load_word_index, {dst, block, index}). +-record(move, {dst, src}). +-record(multimove, {dstlist, srclist}). +-record(phi, {dst, id, arglist}). +-record(return, {varlist}). +-record(store, {base, offset, src, size}). +-record(switch, {src, labels, sorted_by=[]}). + +%%--------------------------------------------------------------------- + +%% An efficient macro to convert byte sizes to bit sizes +-define(bytes_to_bits(Bytes), ((Bytes) bsl 3)). % (N * 8) + +%%--------------------------------------------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_arch.erl b/lib/hipe/rtl/hipe_rtl_arch.erl new file mode 100644 index 0000000000..2afdf4eb6b --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arch.erl @@ -0,0 +1,612 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. +%%===================================================================== +%% Filename : hipe_rtl_arch.erl +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): Created. +%%===================================================================== +%% @doc +%% +%% This module contains interface functions whose semantics and +%% implementation depend on the target architecture. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_arch). + +-export([first_virtual_reg/0, + heap_pointer/0, + heap_limit/0, + fcalls/0, + reg_name/1, + is_precoloured/1, + call_defined/0, + call_used/0, + tailcall_used/0, + return_used/0, + live_at_return/0, + endianess/0, + load_big_2/4, + load_little_2/4, + load_big_4/4, + load_little_4/4, + %% store_4/3, + eval_alu/3, + %% eval_alub/4, + eval_cond/3, + eval_cond_bits/5, + fwait/0, + handle_fp_exception/0, + pcb_load/2, + pcb_load/3, + pcb_store/2, + pcb_store/3, + pcb_address/2, + call_bif/5, + %% alignment/0, + nr_of_return_regs/0, + log2_word_size/0, + word_size/0 + ]). + +-include("hipe_literals.hrl"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% ____________________________________________________________________ +%% +%% ARCH-specific stuff +%% ____________________________________________________________________ +%% +%% +%% XXX: x86 might not _have_ real registers for some of these things +%% + +first_virtual_reg() -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:first_virtual(); + powerpc -> + hipe_ppc_registers:first_virtual(); + arm -> + hipe_arm_registers:first_virtual(); + x86 -> + hipe_x86_registers:first_virtual(); + amd64 -> + hipe_amd64_registers:first_virtual() + end. + +heap_pointer() -> % {GetHPInsn, HPReg, PutHPInsn} + case get(hipe_target_arch) of + ultrasparc -> + heap_pointer_from_reg(hipe_sparc_registers:heap_pointer()); + powerpc -> + heap_pointer_from_reg(hipe_ppc_registers:heap_pointer()); + arm -> + heap_pointer_from_reg(hipe_arm_registers:heap_pointer()); + x86 -> + x86_heap_pointer(); + amd64 -> + amd64_heap_pointer() + end. + +heap_pointer_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_heap_pointer'), + hipe_rtl:mk_reg(Reg), + hipe_rtl:mk_comment('put_heap_pointer')}. + +-ifdef(AMD64_HP_IN_REGISTER). +amd64_heap_pointer() -> + heap_pointer_from_reg(hipe_amd64_registers:heap_pointer()). +-else. +-define(HEAP_POINTER_FROM_PCB_NEEDED,1). +amd64_heap_pointer() -> + heap_pointer_from_pcb(). +-endif. + +-ifdef(X86_HP_IN_ESI). +x86_heap_pointer() -> + heap_pointer_from_reg(hipe_x86_registers:heap_pointer()). +-else. +-define(HEAP_POINTER_FROM_PCB_NEEDED,1). +x86_heap_pointer() -> + heap_pointer_from_pcb(). +-endif. + +-ifdef(HEAP_POINTER_FROM_PCB_NEEDED). +heap_pointer_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_HP), Reg, pcb_store(?P_HP, Reg)}. +-endif. + +heap_limit() -> % {GetHLIMITInsn, HLIMITReg} + case get(hipe_target_arch) of + ultrasparc -> + heap_limit_from_pcb(); + powerpc -> + heap_limit_from_pcb(); + arm -> + heap_limit_from_pcb(); + x86 -> + heap_limit_from_reg(hipe_x86_registers:heap_limit()); + amd64 -> + heap_limit_from_reg(hipe_amd64_registers:heap_limit()) + end. + +heap_limit_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_heap_limit'), + hipe_rtl:mk_reg(Reg)}. + +heap_limit_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_HP_LIMIT), Reg}. + +fcalls() -> % {GetFCallsInsn, FCallsReg, PutFCallsInsn} + case get(hipe_target_arch) of + ultrasparc -> + fcalls_from_pcb(); + powerpc -> + fcalls_from_pcb(); + arm -> + fcalls_from_pcb(); + x86 -> + fcalls_from_reg(hipe_x86_registers:fcalls()); + amd64 -> + fcalls_from_reg(hipe_amd64_registers:fcalls()) + end. + +fcalls_from_reg(Reg) -> + {hipe_rtl:mk_comment('get_fcalls'), + hipe_rtl:mk_reg(Reg), + hipe_rtl:mk_comment('put_fcalls')}. + +fcalls_from_pcb() -> + Reg = hipe_rtl:mk_new_reg(), + {pcb_load(Reg, ?P_FCALLS), Reg, pcb_store(?P_FCALLS, Reg)}. + +reg_name(Reg) -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:reg_name_gpr(Reg); + powerpc -> + hipe_ppc_registers:reg_name_gpr(Reg); + arm -> + hipe_arm_registers:reg_name_gpr(Reg); + x86 -> + hipe_x86_registers:reg_name(Reg); + amd64 -> + hipe_amd64_registers:reg_name(Reg) + end. + +%% @spec is_precoloured(rtl_arg()) -> boolean() +%% +%% @doc Succeeds if Arg is mapped to a precoloured register in the target. +%% +is_precoloured(Arg) -> + case hipe_rtl:is_reg(Arg) of + true -> + is_precolored_regnum(hipe_rtl:reg_index(Arg)); + false -> + hipe_rtl:is_var(Arg) andalso + is_precolored_regnum(hipe_rtl:var_index(Arg)) + end. + +is_precolored_regnum(RegNum) -> + case get(hipe_target_arch) of + ultrasparc -> + hipe_sparc_registers:is_precoloured_gpr(RegNum); + powerpc -> + hipe_ppc_registers:is_precoloured_gpr(RegNum); + arm -> + hipe_arm_registers:is_precoloured_gpr(RegNum); + x86 -> + hipe_x86_registers:is_precoloured(RegNum); + amd64 -> + hipe_amd64_registers:is_precoloured(RegNum) + end. + +call_defined() -> + call_used(). + +call_used() -> + live_at_return(). + +tailcall_used() -> + call_used(). + +return_used() -> + tailcall_used(). + +live_at_return() -> + case get(hipe_target_arch) of + ultrasparc -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_sparc_registers:live_at_return()]); + powerpc -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_ppc_registers:live_at_return()]); + arm -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_arm_registers:live_at_return()]); + x86 -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_x86_registers:live_at_return()]); + amd64 -> + ordsets:from_list([hipe_rtl:mk_reg(R) + || {R,_} <- hipe_amd64_registers:live_at_return()]) + end. + +%% @spec word_size() -> integer() +%% +%% @doc Returns the target's word size. +%% +word_size() -> + case get(hipe_target_arch) of + ultrasparc -> 4; + powerpc -> 4; + arm -> 4; + x86 -> 4; + amd64 -> 8 + end. + +%% alignment() -> +%% case get(hipe_target_arch) of +%% ultrasparc -> 4; +%% powerpc -> 4; +%% arm -> 4; +%% x86 -> 4; +%% amd64 -> 8 +%% end. + +%% @spec log2_word_size() -> integer() +%% +%% @doc Returns log2 of the target's word size. +%% +log2_word_size() -> + case get(hipe_target_arch) of + ultrasparc -> 2; + powerpc -> 2; + arm -> 2; + x86 -> 2; + amd64 -> 3 + end. + +%% @spec endianess() -> big | little +%% +%% @doc Returns the target's endianess. +%% +endianess() -> + case get(hipe_target_arch) of + ultrasparc -> big; + powerpc -> big; + x86 -> little; + amd64 -> little; + arm -> ?ARM_ENDIANESS + end. + +%%%------------------------------------------------------------------------ +%%% Reading integers from binaries, in various sizes and endianesses. +%%% Operand-sized alignment is NOT guaranteed, only byte alignment. +%%%------------------------------------------------------------------------ + +%%% Load a 2-byte big-endian integer from a binary. +%%% Increment Offset by 2. +load_big_2(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + powerpc -> + load_2_directly(Dst, Base, Offset, Signedness); + %% Note: x86 could use a "load;xchgb" or "load;rol $8,<16-bit reg>" + %% sequence here. This has been implemented, but unfortunately didn't + %% make consistent improvements to our benchmarks. + _ -> + load_big_2_in_pieces(Dst, Base, Offset, Signedness) + end. + +%%% Load a 2-byte little-endian integer from a binary. +%%% Increment Offset by 2. +load_little_2(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + x86 -> + load_2_directly(Dst, Base, Offset, Signedness); + powerpc -> + [hipe_rtl:mk_call([Dst], 'lhbrx', [Base,Offset], [], [], not_remote), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(2)) | + case Signedness of + unsigned -> []; + signed -> [hipe_rtl:mk_call([Dst], 'extsh', [Dst], [], [], not_remote)] + end]; + _ -> + load_little_2_in_pieces(Dst, Base, Offset, Signedness) + end. + +load_2_directly(Dst, Base, Offset, Signedness) -> + [hipe_rtl:mk_load(Dst, Base, Offset, int16, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(2))]. + +load_big_2_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +load_little_2_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +%%% Load a 4-byte big-endian integer from a binary. +%%% Increment Offset by 4. +load_big_4(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + powerpc -> + load_4_directly(Dst, Base, Offset, Signedness); + %% Note: x86 could use a "load;bswap" sequence here. + %% This has been implemented, but unfortunately didn't + %% make any noticeable improvements in our benchmarks. + arm -> + %% When loading 4 bytes into a 32-bit register, the + %% signedness of the high-order byte doesn't matter. + %% ARM prefers unsigned byte loads so we'll use that. + load_big_4_in_pieces(Dst, Base, Offset, unsigned); + _ -> + load_big_4_in_pieces(Dst, Base, Offset, Signedness) + end. + +%%% Load a 4-byte little-endian integer from a binary. +%%% Increment Offset by 4. +load_little_4(Dst, Base, Offset, Signedness) -> + case get(hipe_target_arch) of + x86 -> + load_4_directly(Dst, Base, Offset, Signedness); + powerpc -> + [hipe_rtl:mk_call([Dst], 'lwbrx', [Base,Offset], [], [], not_remote), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]; + arm -> + %% When loading 4 bytes into a 32-bit register, the + %% signedness of the high-order byte doesn't matter. + %% ARM prefers unsigned byte loads so we'll use that. + load_little_4_in_pieces(Dst, Base, Offset, unsigned); + _ -> + load_little_4_in_pieces(Dst, Base, Offset, Signedness) + end. + +load_4_directly(Dst, Base, Offset, Signedness) -> + [hipe_rtl:mk_load(Dst, Base, Offset, word, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. + +load_big_4_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +load_little_4_in_pieces(Dst, Base, Offset, Signedness) -> + Tmp1 = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(16)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(24)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]. + +-ifdef(STORE_4_NEEDED). +store_4(Base, Offset, Src) -> + case get(hipe_target_arch) of + x86 -> + store_4_directly(Base, Offset, Src); + powerpc -> + store_4_directly(Base, Offset, Src); + arm -> + store_big_4_in_pieces(Base, Offset, Src); + ultrasparc -> + store_big_4_in_pieces(Base, Offset, Src); + amd64 -> + store_4_directly(Base, Offset, Src) + end. + +store_4_directly(Base, Offset, Src) -> + [hipe_rtl:mk_store(Base, Offset, Src, int32), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. + +store_big_4_in_pieces(Base, Offset, Src) -> + [hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Src, Src, srl, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, Offset, Src, byte), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(4))]. +-endif. + +%%---------------------------------------------------------------------- +%% Handling of arithmetic -- depends on the size of word. +%%---------------------------------------------------------------------- + +eval_alu(Op, Arg1, Arg2) -> + %% io:format("Evaluated alu: ~w ~w ~w = ",[Arg1, Op, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_alu(Op, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_alu(Op, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. + +-ifdef(EVAL_ALUB_NEEDED). +eval_alub(Op, Cond, Arg1, Arg2) -> + %% io:format("Evaluated alub: ~w ~w ~w cond ~w = ",[Arg1, Op, Arg2, Cond]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_alub(Op, Cond, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_alub(Op, Cond, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. +-endif. + +eval_cond(Cond, Arg1, Arg2) -> + %% io:format("Evaluated cond: ~w ~w ~w = ",[Arg1, Cond, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_cond(Cond, Arg1, Arg2); + 8 -> + hipe_rtl_arith_64:eval_cond(Cond, Arg1, Arg2) + end, + %% io:format("~w~n ",[Res]), + Res. + +eval_cond_bits(Cond, N, Z, V, C) -> + %% io:format("Evaluated cond: ~w ~w ~w = ",[Arg1, Cond, Arg2]), + Res = case word_size() of + 4 -> + hipe_rtl_arith_32:eval_cond_bits(Cond, N, Z, V, C); + 8 -> + hipe_rtl_arith_64:eval_cond_bits(Cond, N, Z, V, C) + end, + %% io:format("~w~n ",[Res]), + Res. + +%%---------------------------------------------------------------------- + +fwait() -> + case get(hipe_target_arch) of + x86 -> [hipe_rtl:mk_call([], 'fwait', [], [], [], not_remote)]; + amd64 -> [hipe_rtl:mk_call([], 'fwait', [], [], [], not_remote)]; + arm -> []; + powerpc -> []; + ultrasparc -> [] + end. + +%% @spec handle_fp_exception() -> [term()] +%% +%% @doc +%% Returns RTL code to restore the FPU after a floating-point exception. +%% @end +handle_fp_exception() -> + case get(hipe_target_arch) of + x86 -> + ContLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([], handle_fp_exception, [], + hipe_rtl:label_name(ContLbl), [], not_remote), + ContLbl]; + amd64 -> + ContLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([], handle_fp_exception, [], + hipe_rtl:label_name(ContLbl), [], not_remote), + ContLbl]; + arm -> + []; + powerpc -> + []; + ultrasparc -> + [] + end. + +%% +%% PCB accesses. +%% Wrapped to avoid leaking the PCB pointer to the wrong places. +%% + +pcb_load(Dst, Off) -> pcb_load(Dst, Off, word). + +pcb_load(Dst, Off, Size) -> + hipe_rtl:mk_load(Dst, proc_pointer(), hipe_rtl:mk_imm(Off), Size, unsigned). + +pcb_store(Off, Src) -> pcb_store(Off, Src, word). + +pcb_store(Off, Src, Size) -> + hipe_rtl:mk_store(proc_pointer(), hipe_rtl:mk_imm(Off), Src, Size). + +pcb_address(Dst, Off) -> + hipe_rtl:mk_alu(Dst, proc_pointer(), 'add', hipe_rtl:mk_imm(Off)). + +proc_pointer() -> % must not be exported + case get(hipe_target_arch) of + ultrasparc -> + hipe_rtl:mk_reg_gcsafe(hipe_sparc_registers:proc_pointer()); + powerpc -> + hipe_rtl:mk_reg_gcsafe(hipe_ppc_registers:proc_pointer()); + arm -> + hipe_rtl:mk_reg_gcsafe(hipe_arm_registers:proc_pointer()); + x86 -> + hipe_rtl:mk_reg_gcsafe(hipe_x86_registers:proc_pointer()); + amd64 -> + hipe_rtl:mk_reg_gcsafe(hipe_amd64_registers:proc_pointer()) + end. + +%% +%% Special BIF calls. +%% Wrapped to avoid leaking the PCB pointer to the wrong places, +%% and to allow ARCH-specific expansion. +%% + +call_bif(Dst, Name, Args, Cont, Fail) -> + hipe_rtl:mk_call(Dst, Name, Args, Cont, Fail, not_remote). + +nr_of_return_regs() -> + case get(hipe_target_arch) of + ultrasparc -> + 1; + %% hipe_sparc_registers:nr_rets(); + powerpc -> + 1; + %% hipe_ppc_registers:nr_rets(); + arm -> + 1; + x86 -> + hipe_x86_registers:nr_rets(); + amd64 -> + 1 + %% hipe_amd64_registers:nr_rets(); + end. diff --git a/lib/hipe/rtl/hipe_rtl_arith.inc b/lib/hipe/rtl/hipe_rtl_arith.inc new file mode 100644 index 0000000000..31fedd927e --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith.inc @@ -0,0 +1,177 @@ +%% -*- Erlang -*- +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_arith.inc +%% Created : Feb 2004 +%% Purpose : Implements arithmetic which is parameterized by the size +%% of the word of the target architecture (given as defines). +%%---------------------------------------------------------------------- + + +%% Returns a tuple +%% {Res, Sign, Zero, Overflow, Carry} +%% Res will be a number in the range +%% MAX_SIGNED_INT >= Res >= MIN_SIGNED_INT +%% The other four values are flags that are either true or false +%% +eval_alu(Op, Arg1, Arg2) + when Arg1 =< ?MAX_SIGNED_INT, + Arg1 >= ?MIN_SIGNED_INT, + Arg2 =< ?MAX_SIGNED_INT, + Arg2 >= ?MIN_SIGNED_INT -> + + Sign1 = sign_bit(Arg1), + Sign2 = sign_bit(Arg2), + + case Op of + 'sub' -> + Res = (Arg1 - Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = (Sign1 and (not Sign2) and (not N)) + or + ((not Sign1) and Sign2 and N), + C = ((not Sign1) and Sign2) + or + (N and ((not Sign1) or Sign2)); + 'add' -> + Res = (Arg1 + Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = (Sign1 and Sign2 and (not N)) + or + ((not Sign1) and (not Sign2) and N), + C = (Sign1 and Sign2) + or + ((not N) and (Sign1 or Sign2)); + 'mul' -> + FullRes = Arg1 * Arg2, + Res = FullRes band ?WORDMASK, + ResHi = FullRes bsr ?BITS, + N = sign_bit(Res), + Z = zero(Res), + V = (N and (ResHi =/= -1)) or ((not N) and (ResHi =/= 0)), + C = V; + 'sra' -> + Res = (Arg1 bsr Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'srl' -> + Res = (Arg1 bsr Arg2) band shiftmask(Arg2), + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'sll' -> + Res = (Arg1 bsl Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'or' -> + Res = (Arg1 bor Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'and' -> + Res = (Arg1 band Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + 'xor' -> + Res = (Arg1 bxor Arg2) band ?WORDMASK, + N = sign_bit(Res), + Z = zero(Res), + V = 0, + C = 0; + Op -> + Res = N = Z = V = C = 0, + ?EXIT({"unknown alu op", Op}) + end, + {two_comp_to_erl(Res), N, Z, V, C}; +eval_alu(Op, Arg1, Arg2) -> + ?EXIT({argument_overflow,Op,Arg1,Arg2}). + +%% Bj�rn & Bjarni: +%% We need to be able to do evaluations based only on the bits, since +%% there are cases where we can evaluate a subset of the bits, but can +%% not do a full eval-alub call (eg. a + 0 gives no carry) +%% +-spec eval_cond_bits(atom(), boolean(), boolean(), boolean(), boolean()) -> boolean(). + +eval_cond_bits(Cond, N, Z, V, C) -> + case Cond of + 'eq' -> + Z; + 'ne' -> + not Z; + 'gt' -> + not (Z or (N xor V)); + 'gtu' -> + not (C or Z); + 'ge' -> + not (N xor V); + 'geu'-> + not C; + 'lt' -> + N xor V; + 'ltu'-> + C; + 'le' -> + Z or (N xor V); + 'leu'-> + C or Z; + 'overflow' -> + V; + 'not_overflow' -> + not V; + _ -> + ?EXIT({'condition code not handled',Cond}) + end. + +eval_alub(Op, Cond, Arg1, Arg2) -> + {Res, N, Z, V, C} = eval_alu(Op, Arg1, Arg2), + {Res, eval_cond_bits(Cond, N, Z, V, C)}. + +eval_cond(Cond, Arg1, Arg2) -> + {_, Bool} = eval_alub('sub', Cond, Arg1, Arg2), + Bool. + +sign_bit(Val) -> + ((Val bsr ?SIGN_BIT) band 1) =:= 1. + +two_comp_to_erl(V) -> + if V > ?MAX_SIGNED_INT -> + - ((?MAX_UNSIGNED_INT + 1) - V); + true -> V + end. + +shiftmask(Arg) -> + Setbits = ?BITS - Arg, + (1 bsl Setbits) - 1. + +zero(Val) -> + Val =:= 0. + diff --git a/lib/hipe/rtl/hipe_rtl_arith_32.erl b/lib/hipe/rtl/hipe_rtl_arith_32.erl new file mode 100644 index 0000000000..a8a6043cda --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith_32.erl @@ -0,0 +1,50 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2003-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2002 by Erik Johansson. +%% ==================================================================== +%% Filename : hipe_rtl_arith_32.erl +%% Module : hipe_rtl_arith_32 +%% Purpose : To implement 32-bit RTL-arithmetic +%% Notes : The arithmetic works on 32-bit signed integers. +%% The implementation is taken from the implementation +%% of arithmetic on SPARC. +%% XXX: This code is seldom used, and hence also +%% seldom tested. +%% Look here for strange bugs appearing when +%% turning on rtl_prop. +%% +%% History : * 2002-10-23 Erik Stenman (happi@it.uu.se): Created. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_arith_32). + +-export([eval_alu/3, eval_alub/4, eval_cond/3, eval_cond_bits/5]). + +-define(BITS, 32). +-define(SIGN_BIT, 31). +-define(WORDMASK, 16#ffffffff). +-define(MAX_SIGNED_INT, 16#7fffffff). +-define(MIN_SIGNED_INT, -16#80000000). +-define(MAX_UNSIGNED_INT, 16#ffffffff). + +-include("../main/hipe.hrl"). %% for ?EXIT + +-include("hipe_rtl_arith.inc"). diff --git a/lib/hipe/rtl/hipe_rtl_arith_64.erl b/lib/hipe/rtl/hipe_rtl_arith_64.erl new file mode 100644 index 0000000000..d0d576b65e --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_arith_64.erl @@ -0,0 +1,38 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_arith_64.erl +%% Created : Feb 2004 +%% Purpose : Implements arithmetic for 64-bit target architectures. +%%---------------------------------------------------------------------- + +-module(hipe_rtl_arith_64). +-export([eval_alu/3, eval_alub/4, eval_cond/3, eval_cond_bits/5]). + +-define(BITS, 64). +-define(SIGN_BIT, 63). +-define(WORDMASK, 16#ffffffffffffffff). +-define(MAX_SIGNED_INT, 16#7fffffffffffffff). +-define(MIN_SIGNED_INT, -16#8000000000000000). +-define(MAX_UNSIGNED_INT,16#ffffffffffffffff). + +-include("../main/hipe.hrl"). %% for ?EXIT + +-include("hipe_rtl_arith.inc"). diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl new file mode 100644 index 0000000000..5ea51acedb --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -0,0 +1,80 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2006-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_binary_2.erl +%%% Author : Per Gustafsson <pergu@it.uu.se> +%%% Description : +%%% +%%% Created : 5 Mar 2007 by Per Gustafsson <pergu@it.uu.se> +%%%------------------------------------------------------------------- +-module(hipe_rtl_binary). + +-export([gen_rtl/7]). + +gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SysLimName, ConstTab) -> + case type_of_operation(BsOP) of + match -> + {hipe_rtl_binary_match:gen_rtl( + BsOP, Dst, Args, TrueLblName, FalseLblName),ConstTab}; + construct -> + hipe_rtl_binary_construct:gen_rtl( + BsOP, Dst, Args, TrueLblName, FalseLblName, SysLimName, ConstTab) + end. + +type_of_operation({bs_start_match,_}) -> match; +type_of_operation({{bs_start_match,_},_}) -> match; +type_of_operation({bs_get_binary,_,_}) -> match; +type_of_operation({bs_get_binary_all,_,_}) -> match; +type_of_operation({bs_get_binary_all_2,_,_}) -> match; +type_of_operation({bs_get_integer,_,_}) -> match; +type_of_operation({bs_get_float,_,_}) -> match; +type_of_operation({bs_skip_bits,_}) -> match; +type_of_operation({bs_skip_bits_all,_,_}) -> match; +type_of_operation({bs_test_tail,_}) -> match; +type_of_operation({bs_restore,_}) -> match; +type_of_operation({bs_save,_}) -> match; +type_of_operation({bs_test_unit,_}) -> match; +type_of_operation({bs_match_string,_,_}) -> match; +type_of_operation(bs_context_to_binary) -> match; +type_of_operation({bs_add,_}) -> construct; +type_of_operation({bs_add,_,_}) -> construct; +type_of_operation(bs_bits_to_bytes) -> construct; +type_of_operation(bs_bits_to_bytes2) -> construct; +type_of_operation({bs_init,_}) -> construct; +type_of_operation({bs_init,_,_}) -> construct; +type_of_operation({bs_init_bits,_}) -> construct; +type_of_operation({bs_init_bits,_,_}) -> construct; +type_of_operation({bs_put_binary,_,_}) -> construct; +type_of_operation({bs_put_binary_all,_}) -> construct; +type_of_operation({bs_put_float,_,_,_}) -> construct; +type_of_operation({bs_put_integer,_,_,_}) -> construct; +type_of_operation({bs_put_string,_,_}) -> construct; +type_of_operation({unsafe_bs_put_integer,_,_,_}) -> construct; +type_of_operation(bs_utf8_size) -> construct; +type_of_operation(bs_put_utf8) -> construct; +type_of_operation(bs_get_utf8) -> match; +type_of_operation(bs_utf16_size) -> construct; +type_of_operation({bs_put_utf16,_}) -> construct; +type_of_operation({bs_get_utf16,_}) -> match; +type_of_operation(bs_validate_unicode) -> construct; +type_of_operation(bs_validate_unicode_retract) -> match; +type_of_operation(bs_final) -> construct; +type_of_operation({bs_append,_,_,_,_}) -> construct; +type_of_operation({bs_private_append,_,_}) -> construct; +type_of_operation(bs_init_writable) -> construct. diff --git a/lib/hipe/rtl/hipe_rtl_binary_construct.erl b/lib/hipe/rtl/hipe_rtl_binary_construct.erl new file mode 100644 index 0000000000..29993b9715 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary_construct.erl @@ -0,0 +1,1363 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% ==================================================================== +%% Module : hipe_rtl_inline_bs_ops +%% Purpose : +%% Notes : +%% History : * 2001-06-14 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_binary_construct). +-export([gen_rtl/7]). +-import(hipe_tagscheme, [set_field_from_term/3, + get_field_from_term/3, + set_field_from_pointer/3, + get_field_from_pointer/3]). +%%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +-define(BYTE_SHIFT, hipe_rtl:mk_imm(3)). %% Turn bits into bytes or vice versa +-define(LOW_BITS, hipe_rtl:mk_imm(7)). %% Three lowest bits set +-define(LOW_BITS_INT, 7). +-define(BYTE_SIZE, 8). +-define(MAX_BINSIZE, ((1 bsl ((hipe_rtl_arch:word_size()*?BYTE_SIZE)-3)) - 1)). + + +%% ------------------------------------------------------------------------- +%% The code is generated as a list of lists, it will be flattened later. +%% + +gen_rtl(BsOP, Dst, Args, TrueLblName, FalseLblName, SystemLimitLblName, ConstTab) -> + %%io:format("~w, ~w, ~w~n", [BsOP, Args, Dst]), + case BsOP of + {bs_put_string, String, SizeInBytes} -> + [NewOffset] = get_real(Dst), + [Base, Offset] = Args, + put_string(NewOffset, ConstTab, String, SizeInBytes, Base, Offset, + TrueLblName); + _ -> + Code = + case BsOP of + {bs_init, Size, _Flags} -> + [] = Args, + [Dst0, Base, Offset] = Dst, + case is_illegal_const(Size bsl 3) of + true -> + hipe_rtl:mk_goto(SystemLimitLblName); + false -> + const_init2(Size, Dst0, Base, Offset, TrueLblName) + end; + + {bs_init, _Flags} -> + [Size] = Args, + [Dst0, Base, Offset] = Dst, + var_init2(Size, Dst0, Base, Offset, TrueLblName, + SystemLimitLblName, FalseLblName); + + {bs_init_bits, Size, _Flags} -> + [] = Args, + [Dst0, Base, Offset] = Dst, + case is_illegal_const(Size) of + true -> + hipe_rtl:mk_goto(SystemLimitLblName); + false -> + const_init_bits(Size, Dst0, Base, Offset, TrueLblName) + end; + + {bs_init_bits, _Flags} -> + [Size] = Args, + [Dst0, Base, Offset] = Dst, + var_init_bits(Size, Dst0, Base, Offset, TrueLblName, + SystemLimitLblName, FalseLblName); + + {bs_put_binary_all, _Flags} -> + [Src, Base, Offset] = Args, + [NewOffset] = get_real(Dst), + put_binary_all(NewOffset, Src, Base, Offset, TrueLblName, FalseLblName); + + {bs_put_binary, Size, _Flags} -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + [NewOffset] = get_real(Dst), + case Args of + [Src, Base, Offset] -> + put_static_binary(NewOffset, Src, Size, Base, Offset, + TrueLblName, FalseLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, FalseLblName), + InCode = put_dynamic_binary(NewOffset, Src, SizeReg, Base, + Offset, TrueLblName, FalseLblName), + SizeCode ++ InCode + end + end; + + {bs_put_float, Size, Flags, ConstInfo} -> + [NewOffset] = get_real(Dst), + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + case Args of + [Src, Base, Offset] -> + CCode = static_float_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName), + put_float(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, ConstInfo, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, FalseLblName), + InCode = float_c_code(NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName), + SizeCode ++ InCode + end + end; + + {bs_put_integer, Size, Flags, ConstInfo} -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, + Base, Offset, Size, + Flags, TrueLblName, + FalseLblName), + put_static_int(NewOffset, Src, Base, Offset, Size, + CCode, Aligned, LittleEndian, TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, + Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = + put_dynamic_int(NewOffset, Src, Base, Offset, + SizeReg, CCode, Aligned, + LittleEndian, TrueLblName), + SizeCode ++ InCode + end + end + end; + + {unsafe_bs_put_integer, 0, _Flags, _ConstInfo} -> + [NewOffset] = get_real(Dst), + case Args of + [_Src, _Base, Offset] -> + [hipe_rtl:mk_move(NewOffset,Offset), + hipe_rtl:mk_goto(TrueLblName)]; + [_Src, _Bits, _Base, Offset] -> + [hipe_rtl:mk_move(NewOffset,Offset), + hipe_rtl:mk_goto(TrueLblName)] + end; + + {unsafe_bs_put_integer, Size, Flags, ConstInfo} -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Aligned = aligned(Flags), + LittleEndian = littleendian(Flags), + [NewOffset] = get_real(Dst), + case ConstInfo of + fail -> + [hipe_rtl:mk_goto(FalseLblName)]; + _ -> + case Args of + [Src, Base, Offset] -> + CCode = static_int_c_code(NewOffset, Src, + Base, Offset, Size, + Flags, TrueLblName, + FalseLblName), + put_unsafe_static_int(NewOffset, Src, Base, + Offset, Size, + CCode, Aligned, LittleEndian, + TrueLblName); + [Src, Bits, Base, Offset] -> + {SizeCode, SizeReg} = make_size(Size, Bits, + FalseLblName), + CCode = int_c_code(NewOffset, Src, Base, + Offset, SizeReg, Flags, + TrueLblName, FalseLblName), + InCode = + put_unsafe_dynamic_int(NewOffset, Src, Base, + Offset, SizeReg, CCode, + Aligned, LittleEndian, + TrueLblName), + SizeCode ++ InCode + end + end + end; + + bs_utf8_size -> + case Dst of + [_DstVar] -> + [_Arg] = Args, + [hipe_rtl:mk_call(Dst, bs_utf8_size, Args, + TrueLblName, [], not_remote)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + bs_put_utf8 -> + [_Src, _Base, _Offset] = Args, + NewDsts = get_real(Dst), + [hipe_rtl:mk_call(NewDsts, bs_put_utf8, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_utf16_size -> + case Dst of + [_DstVar] -> + [_Arg] = Args, + [hipe_rtl:mk_call(Dst, bs_utf16_size, Args, + TrueLblName, [], not_remote)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + {bs_put_utf16, Flags} -> + [_Src, _Base, _Offset] = Args, + NewDsts = get_real(Dst), + PrimOp = % workaround for bif/primop arity restrictions + case littleendian(Flags) of + false -> bs_put_utf16be; + true -> bs_put_utf16le + end, + [hipe_rtl:mk_call(NewDsts, PrimOp, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_validate_unicode -> + [_Arg] = Args, + [hipe_rtl:mk_call([], bs_validate_unicode, Args, + TrueLblName, FalseLblName, not_remote)]; + + bs_final -> + Zero = hipe_rtl:mk_imm(0), + [Src, Offset] = Args, + [BitSize, ByteSize] = create_regs(2), + [ShortLbl, LongLbl] = create_lbls(2), + case Dst of + [DstVar] -> + [hipe_rtl:mk_alub(BitSize, Offset, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(ShortLbl), + hipe_rtl:label_name(LongLbl)), ShortLbl, + hipe_rtl:mk_move(DstVar, Src), + hipe_rtl:mk_goto(TrueLblName), + LongLbl, + hipe_rtl:mk_alu(ByteSize, Offset, 'srl', ?BYTE_SHIFT), + hipe_tagscheme:mk_sub_binary(DstVar, ByteSize, + Zero, BitSize, Zero, Src), + hipe_rtl:mk_goto(TrueLblName)]; + [] -> + [hipe_rtl:mk_goto(TrueLblName)] + end; + + bs_init_writable -> + Zero = hipe_rtl:mk_imm(0), + [Size] = Args, + [DstVar] = Dst, + [SizeReg] = create_regs(1), + [Base] = create_unsafe_regs(1), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE + ?SUB_BIN_WORDSIZE), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + allocate_writable(DstVar, Base, SizeReg, Zero, Zero), + hipe_rtl:mk_goto(TrueLblName)]; + + {bs_private_append, _U, _F} -> + [Size, Bin] = Args, + [DstVar, Base, Offset] = Dst, + [ProcBin] = create_vars(1), + [SubSize, SizeReg, EndSubSize, EndSubBitSize] = create_regs(4), + SubBinSize = {sub_binary, binsize}, + [get_field_from_term({sub_binary, orig}, Bin, ProcBin), + get_field_from_term(SubBinSize, Bin, SubSize), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + realloc_binary(SizeReg, ProcBin, Base), + calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize), + set_field_from_term(SubBinSize, Bin, EndSubSize), + set_field_from_term({sub_binary, bitsize}, Bin, EndSubBitSize), + hipe_rtl:mk_move(DstVar, Bin), + hipe_rtl:mk_goto(TrueLblName)]; + + {bs_append, _U, _F, _B, _Bla} -> + [Size, Bin] = Args, + [DstVar, Base, Offset] = Dst, + [ProcBin] = create_vars(1), + [Flags, SizeReg, IsWritable, EndSubSize, EndSubBitSize] = + create_regs(5), + [ContLbl,ContLbl2,ContLbl3,WritableLbl,NotWritableLbl] = Lbls = + create_lbls(5), + [ContLblName, ContLbl2Name, ContLbl3Name, Writable, NotWritable] = + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + Zero = hipe_rtl:mk_imm(0), + SubIsWritable = {sub_binary, is_writable}, + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE + ?PROC_BIN_WORDSIZE), + check_and_untag_fixnum(Size, SizeReg, FalseLblName), + hipe_tagscheme:test_bitstr(Bin, ContLblName, FalseLblName, 0.99), + ContLbl, + hipe_tagscheme:test_subbinary(Bin,ContLbl2Name, NotWritable), + ContLbl2, + get_field_from_term(SubIsWritable, Bin, IsWritable), + hipe_rtl:mk_branch(IsWritable, 'ne', Zero, + ContLbl3Name, NotWritable), + ContLbl3, + get_field_from_term({sub_binary, orig}, Bin, ProcBin), + get_field_from_term({proc_bin, flags}, ProcBin, Flags), + hipe_rtl:mk_alub(Flags, Flags, 'and', + hipe_rtl:mk_imm(?PB_IS_WRITABLE), + eq, NotWritable, Writable, 0.01), + WritableLbl, + set_field_from_term(SubIsWritable, Bin, Zero), + realloc_binary(SizeReg, ProcBin, Base), + calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize), + hipe_tagscheme:mk_sub_binary(DstVar, EndSubSize, Zero, + EndSubBitSize, Zero, + hipe_rtl:mk_imm(1), ProcBin), + hipe_rtl:mk_goto(TrueLblName), + NotWritableLbl, + not_writable_code(Bin, SizeReg, DstVar, Base, Offset, + TrueLblName, FalseLblName)] + end, + {Code, ConstTab} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code that is used in the append and init writeable functions +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +not_writable_code(Bin, SizeReg, Dst, Base, Offset, + TrueLblName, FalseLblName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize, TotSize, TotBytes, UsedBytes] = create_regs(5), + [IncLbl,AllLbl] = Lbls = create_lbls(2), + [IncLblName,AllLblName] = get_label_names(Lbls), + [get_base_offset_size(Bin, SrcBase, SrcOffset, SrcSize, FalseLblName), + hipe_rtl:mk_alu(TotSize, SrcSize, add, SizeReg), + hipe_rtl:mk_alu(TotBytes, TotSize, add, ?LOW_BITS), + hipe_rtl:mk_alu(TotBytes, TotBytes, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(UsedBytes, TotBytes, sll, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_branch(UsedBytes, ge, hipe_rtl:mk_imm(256), + AllLblName, IncLblName), + IncLbl, + hipe_rtl:mk_move(UsedBytes, hipe_rtl:mk_imm(256)), + AllLbl, + allocate_writable(Dst, Base, UsedBytes, TotBytes, TotSize), + put_binary_all(Offset, Bin, Base, hipe_rtl:mk_imm(0), + TrueLblName, FalseLblName)]. + +allocate_writable(Dst, Base, UsedBytes, TotBytes, TotSize) -> + Zero = hipe_rtl:mk_imm(0), + [NextLbl] = create_lbls(1), + [EndSubSize, EndSubBitSize, ProcBin] = create_regs(3), + [hipe_rtl:mk_call([Base], bs_allocate, [UsedBytes], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, TotBytes, + hipe_rtl:mk_imm(?PB_IS_WRITABLE bor + ?PB_ACTIVE_WRITER), + ProcBin), + hipe_rtl:mk_alu(EndSubSize, TotSize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(EndSubBitSize, TotSize, 'and', ?LOW_BITS), + hipe_tagscheme:mk_sub_binary(Dst, EndSubSize, Zero, EndSubBitSize, + Zero, hipe_rtl:mk_imm(1), ProcBin)]. + +check_and_untag_fixnum(Size, SizeReg, FalseLblName) -> + [ContLbl,NextLbl] = Lbls = create_lbls(2), + [ContLblName,NextLblName] = get_label_names(Lbls), + [hipe_tagscheme:test_fixnum(Size, ContLblName, FalseLblName, 0.99), + ContLbl, + hipe_tagscheme:untag_fixnum(SizeReg,Size), + hipe_rtl:mk_branch(SizeReg, ge, hipe_rtl:mk_imm(0), NextLblName, + FalseLblName), + NextLbl]. + +realloc_binary(SizeReg, ProcBin, Base) -> + [NoReallocLbl, ReallocLbl, NextLbl, ContLbl] = Lbls = create_lbls(4), + [NoReallocLblName, ReallocLblName, NextLblName, ContLblName] = + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + [PBSize, Tmp, ByteSize, NewSize, Flags, ResultingSize, OrigSize, + BinPointer] = create_regs(8), + ProcBinSizeTag = {proc_bin, binsize}, + ProcBinFlagsTag = {proc_bin, flags}, + ProcBinValTag = {proc_bin, val}, + ProcBinBytesTag = {proc_bin, bytes}, + BinOrigSizeTag = {binary, orig_size}, + [get_field_from_term(ProcBinSizeTag, ProcBin, PBSize), + hipe_rtl:mk_alu(Tmp, SizeReg, 'add', ?LOW_BITS), + hipe_rtl:mk_alu(ByteSize, Tmp, 'srl', ?BYTE_SHIFT), + hipe_rtl:mk_alu(ResultingSize, ByteSize, 'add', PBSize), + set_field_from_term(ProcBinSizeTag, ProcBin, ResultingSize), + get_field_from_term(ProcBinFlagsTag, ProcBin, Flags), + hipe_rtl:mk_alu(Flags, Flags, 'or', hipe_rtl:mk_imm(?PB_ACTIVE_WRITER)), + set_field_from_term(ProcBinFlagsTag, ProcBin, Flags), + get_field_from_term(ProcBinValTag, ProcBin, BinPointer), + get_field_from_pointer(BinOrigSizeTag, BinPointer, OrigSize), + hipe_rtl:mk_branch(OrigSize, 'lt', ResultingSize, + ReallocLblName, NoReallocLblName), + NoReallocLbl, + get_field_from_term(ProcBinBytesTag, ProcBin, Base), + hipe_rtl:mk_goto(ContLblName), + ReallocLbl, + hipe_rtl:mk_alu(NewSize, ResultingSize, 'sll', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_call([BinPointer], bs_reallocate, [BinPointer, NewSize], + NextLblName, [], not_remote), + NextLbl, + set_field_from_pointer(BinOrigSizeTag, BinPointer, NewSize), + set_field_from_term(ProcBinValTag, ProcBin, BinPointer), + hipe_tagscheme:extract_binary_bytes(BinPointer, Base), + set_field_from_term(ProcBinBytesTag, ProcBin, Base), + ContLbl]. + +calculate_sizes(Bin, SizeReg, Offset, EndSubSize, EndSubBitSize) -> + [SubSize, SubBitSize, EndSize] = create_regs(3), + [get_field_from_term({sub_binary, binsize}, Bin, SubSize), + get_field_from_term({sub_binary, bitsize}, Bin, SubBitSize), + hipe_rtl:mk_alu(Offset, SubSize, 'sll', ?BYTE_SHIFT), + hipe_rtl:mk_alu(Offset, Offset, 'add', SubBitSize), + hipe_rtl:mk_alu(EndSize, Offset, 'add', SizeReg), + hipe_rtl:mk_alu(EndSubSize, EndSize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(EndSubBitSize, EndSize, 'and', ?LOW_BITS)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code that is used to create calls to beam functions +%% +%% X_c_code/8, used for putting terms into binaries +%% +%% X_get_c_code/10, used for getting terms from binaries +%% +%% - gen_test_sideffect_bs_call/4 is used to make a C-call that might +%% fail but doesn't return an erlang value. +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +static_float_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName) -> + [SizeReg] = create_regs(1), + [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))| + float_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName)]. + +float_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + put_c_code(bs_put_small_float, NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName). + +static_int_c_code(NewOffset, Src, Base, Offset, Size, Flags, + TrueLblName, FalseLblName) -> + [SizeReg] = create_regs(1), + [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))| + int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName)]. + +int_c_code(NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + put_c_code(bs_put_big_integer, NewOffset, Src, Base, Offset, SizeReg, + Flags, TrueLblName, FalseLblName). + +binary_c_code(NewOffset, Src, Base, Offset, Size, TrueLblName) -> + PassedLbl = hipe_rtl:mk_new_label(), + [SizeReg, FlagsReg] = create_regs(2), + [hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(SizeReg, Size), + hipe_rtl:mk_call([], bs_put_bits, [Src, SizeReg, Base, Offset, FlagsReg], + hipe_rtl:label_name(PassedLbl),[],not_remote), + PassedLbl, + hipe_rtl:mk_alu(NewOffset, Offset, add, SizeReg), + hipe_rtl:mk_goto(TrueLblName)]. + +put_c_code(Func, NewOffset, Src, Base, Offset, SizeReg, Flags, + TrueLblName, FalseLblName) -> + PassedLbl = hipe_rtl:mk_new_label(), + [FlagsReg] = create_regs(1), + [hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + gen_test_sideffect_bs_call(Func, [Src, SizeReg, Base, Offset, FlagsReg], + hipe_rtl:label_name(PassedLbl), FalseLblName), + PassedLbl, + hipe_rtl:mk_alu(NewOffset, Offset, add, SizeReg), + hipe_rtl:mk_goto(TrueLblName)]. + +gen_test_sideffect_bs_call(Name, Args, TrueLblName, FalseLblName) -> + [Tmp1] = create_regs(1), + RetLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([Tmp1], Name, Args, + hipe_rtl:label_name(RetLbl), [], not_remote), + RetLbl, + hipe_rtl:mk_branch(Tmp1, eq, hipe_rtl:mk_imm(0), + FalseLblName, TrueLblName, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Small utility functions: +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg_gcsafe()|create_regs(X-1)]; +create_regs(0) -> + []. + +create_unsafe_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg()|create_unsafe_regs(X-1)]; +create_unsafe_regs(0) -> + []. + +create_vars(X) when X > 0 -> + [hipe_rtl:mk_new_var()|create_vars(X-1)]; +create_vars(0) -> + []. + +create_lbls(X) when X > 0 -> + [hipe_rtl:mk_new_label()|create_lbls(X-1)]; +create_lbls(0) -> + []. + +get_label_names(Lbls) -> + [hipe_rtl:label_name(Lbl) || Lbl <- Lbls]. + +aligned(Flags) -> + case Flags band ?BSF_ALIGNED of + 1 -> true; + 0 -> false + end. + +littleendian(Flags) -> + case Flags band 2 of + 2 -> true; + 0 -> false + end. + +is_illegal_const(Const) -> + Const >= (1 bsl (hipe_rtl_arch:word_size() * ?BYTE_SIZE)) orelse Const < 0. + +get_real(Dst) -> + case Dst of + [_NewOffset] -> Dst; + [] -> create_regs(1) + end. + +%%----------------------------------------------------------------------------- +%% Help functions implementing the bs operations in rtl code. +%% +%% The following functions are called from the translation switch: +%% +%% - put_string/7 creates code to copy a string to a binary +%% starting at base+offset and ending at base+newoffset +%% +%% - const_init2/6 initializes the creation of a binary of constant size +%% +%% - var_init2/6 initializes the creation of a binary of variable size +%% +%% - get_int_from_unaligned_bin/11 creates code to extract a fixed +%% size integer from a binary or makes a c-call if it does not +%% conform to some certain rules. +%% +%% - get_unknown_size_int/11 creates code to extract a variable size +%% byte-aligned integer from a binary or makes a c-call if it +%% does not conform to some certain rules. +%% +%% - skip_no_of_bits/5 creates code to skip a variable amount of bits +%% in a binary. +%% +%% - load_match_buffer/7 reloads the C-matchbuffer to RTL registers. +%% +%% - expand_runtime/4 creates code that calculates a maximal heap need +%% before a binary match +%%----------------------------------------------------------------------------- + +put_string(NewOffset, ConstTab, String, SizeInBytes, Base, Offset, TLName) -> + [StringBase] = create_regs(1), + {NewTab, Lbl} = hipe_consttab:insert_block(ConstTab, byte, String), + {[hipe_rtl:mk_load_address(StringBase, Lbl, constant)| + copy_string(StringBase, SizeInBytes, Base, Offset, + NewOffset, TLName)], + NewTab}. + +const_init2(Size, Dst, Base, Offset, TrueLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + NextLbl = hipe_rtl:mk_new_label(), + case Size =< ?MAX_HEAP_BIN_SIZE of + true -> + [hipe_rtl:mk_gctest(((Size + 3*WordSize-1) bsr Log2WordSize)+?SUB_BIN_WORDSIZE), + hipe_tagscheme:create_heap_binary(Base, Size, Dst), + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_goto(TrueLblName)]; + false -> + ByteSize = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+?SUB_BIN_WORDSIZE), + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(ByteSize, hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_call([Base], bs_allocate, [ByteSize], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, ByteSize, Dst), + hipe_rtl:mk_goto(TrueLblName)] + end. + +const_init_bits(Size, Dst, Base, Offset, TrueLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + [NextLbl] = create_lbls(1), + TmpDst = hipe_rtl:mk_new_var(), + Zero = hipe_rtl:mk_imm(0), + {ExtraSpace, SubBinCode} = + if (Size rem ?BYTE_SIZE) =:= 0 -> + {0,[hipe_rtl:mk_move(Dst, TmpDst)]}; + true -> + {?SUB_BIN_WORDSIZE, + hipe_tagscheme:mk_sub_binary(Dst, hipe_rtl:mk_imm(Size bsr 3), Zero, + hipe_rtl:mk_imm(Size band ?LOW_BITS_INT), + Zero, TmpDst)} + end, + BaseBinCode = + if Size =< (?MAX_HEAP_BIN_SIZE * 8) -> + ByteSize = (Size + 7) div 8, + [hipe_rtl:mk_gctest(((ByteSize+ 3*WordSize-1) bsr Log2WordSize)+ ExtraSpace), + hipe_tagscheme:create_heap_binary(Base, ByteSize, TmpDst), + hipe_rtl:mk_move(Offset, Zero)]; + true -> + ByteSize = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+ExtraSpace), + hipe_rtl:mk_move(Offset, Zero), + hipe_rtl:mk_move(ByteSize, hipe_rtl:mk_imm((Size+7) bsr 3)), + hipe_rtl:mk_call([Base], bs_allocate, [ByteSize], + hipe_rtl:label_name(NextLbl),[],not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, ByteSize, TmpDst)] + end, + [BaseBinCode, SubBinCode, hipe_rtl:mk_goto(TrueLblName)]. + +var_init2(Size, Dst, Base, Offset, TrueLblName, SystemLimitLblName, FalseLblName) -> + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + [ContLbl,HeapLbl,REFCLbl,NextLbl] = create_lbls(4), + [USize,Tmp] = create_unsafe_regs(2), + [get_32_bit_value(Size, USize, SystemLimitLblName, FalseLblName), + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_BINSIZE), + hipe_rtl:label_name(ContLbl), + SystemLimitLblName), + ContLbl, + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_HEAP_BIN_SIZE), + hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_rtl:mk_alu(Tmp, USize, add, hipe_rtl:mk_imm(3*WordSize-1)), + hipe_rtl:mk_alu(Tmp, Tmp, srl, hipe_rtl:mk_imm(Log2WordSize)), + hipe_rtl:mk_alu(Tmp, Tmp, add, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE)), + hipe_rtl:mk_gctest(Tmp), + hipe_tagscheme:create_heap_binary(Base, USize, Dst), + hipe_rtl:mk_goto(TrueLblName), + REFCLbl, + hipe_rtl:mk_gctest(?PROC_BIN_WORDSIZE+?SUB_BIN_WORDSIZE), + hipe_rtl:mk_call([Base], bs_allocate, [USize], + hipe_rtl:label_name(NextLbl), [], not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, USize, Dst), + hipe_rtl:mk_goto(TrueLblName)]. + +var_init_bits(Size, Dst, Base, Offset, TrueLblName, SystemLimitLblName, FalseLblName) -> + [HeapLbl,REFCLbl,NextLbl,NoSubLbl,SubLbl,ContLbl, + NoCreateSubBin, CreateSubBin, JoinLbl, JoinLbl2] = create_lbls(10), + [USize,ByteSize,TotByteSize,OffsetBits] = create_regs(4), + [TmpDst] = create_unsafe_regs(1), + Log2WordSize = hipe_rtl_arch:log2_word_size(), + WordSize = hipe_rtl_arch:word_size(), + MaximumWords = + erlang:max((?MAX_HEAP_BIN_SIZE + 3*WordSize) bsr Log2WordSize, + ?PROC_BIN_WORDSIZE) + ?SUB_BIN_WORDSIZE, + Zero = hipe_rtl:mk_imm(0), + [hipe_rtl:mk_gctest(MaximumWords), + get_32_bit_value(Size, USize, SystemLimitLblName, FalseLblName), + hipe_rtl:mk_alu(ByteSize, USize, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alub(OffsetBits, USize, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(NoSubLbl), + hipe_rtl:label_name(SubLbl)), + NoSubLbl, + hipe_rtl:mk_move(TotByteSize, ByteSize), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + SubLbl, + hipe_rtl:mk_alu(TotByteSize, ByteSize, 'add', hipe_rtl:mk_imm(1)), + JoinLbl, + hipe_rtl:mk_branch(USize, le, hipe_rtl:mk_imm(?MAX_BINSIZE), + hipe_rtl:label_name(ContLbl), + SystemLimitLblName), + ContLbl, + hipe_rtl:mk_branch(TotByteSize, 'le', hipe_rtl:mk_imm(?MAX_HEAP_BIN_SIZE), + hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_tagscheme:create_heap_binary(Base, TotByteSize, TmpDst), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl2)), + REFCLbl, + hipe_rtl:mk_call([Base], bs_allocate, [TotByteSize], + hipe_rtl:label_name(NextLbl),[],not_remote), + NextLbl, + hipe_tagscheme:create_refc_binary(Base, TotByteSize, TmpDst), + JoinLbl2, + hipe_rtl:mk_move(Offset, Zero), + hipe_rtl:mk_branch(OffsetBits, 'eq', Zero, + hipe_rtl:label_name(NoCreateSubBin), + hipe_rtl:label_name(CreateSubBin)), + CreateSubBin, + hipe_tagscheme:mk_sub_binary(Dst, ByteSize, Zero, OffsetBits, Zero, TmpDst), + hipe_rtl:mk_goto(TrueLblName), + NoCreateSubBin, + hipe_rtl:mk_move(Dst, TmpDst), + hipe_rtl:mk_goto(TrueLblName)]. + +put_binary_all(NewOffset, Src, Base, Offset, TLName, FLName) -> + [SrcBase,SrcOffset,NumBits] = create_regs(3), + CCode = binary_c_code(NewOffset, Src, Base, Offset, NumBits, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, NumBits, Base, Offset, + NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, NumBits,FLName) ++ + test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode). + +test_alignment(SrcOffset, NumBits, Offset, AlignedCode, CCode) -> + [Tmp] = create_regs(1), + [AlignedLbl,CLbl] = create_lbls(2), + [hipe_rtl:mk_alu(Tmp, SrcOffset, 'or', NumBits), + hipe_rtl:mk_alu(Tmp, Tmp, 'or', Offset), + hipe_rtl:mk_alub(Tmp, Tmp, 'and', ?LOW_BITS, 'eq', + hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(CLbl)), + AlignedLbl, + AlignedCode, + CLbl, + CCode]. + +put_static_binary(NewOffset, Src, Size, Base, Offset, TLName, FLName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize] = create_regs(2), + case Size of + 0 -> + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + [hipe_rtl:mk_move(NewOffset, Offset), + hipe_rtl:mk_goto(TLName)]; + _ -> + SizeImm = hipe_rtl:mk_imm(Size), + CCode = binary_c_code(NewOffset, Src, Base, Offset, SizeImm, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, SizeImm, Base, + Offset, NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + small_check(SizeImm, SrcSize, FLName) ++ + test_alignment(SrcOffset, SizeImm, Offset, AlignedCode, CCode) + end. + +put_dynamic_binary(NewOffset, Src, SizeReg, Base, Offset, TLName, FLName) -> + [SrcBase] = create_unsafe_regs(1), + [SrcOffset, SrcSize] = create_regs(2), + CCode = binary_c_code(NewOffset, Src, Base, Offset, SizeReg, TLName), + AlignedCode = copy_aligned_bytes(SrcBase, SrcOffset, SizeReg, Base, Offset, + NewOffset, TLName), + get_base_offset_size(Src, SrcBase, SrcOffset, SrcSize, FLName) ++ + small_check(SizeReg, SrcSize, FLName) ++ + test_alignment(SrcOffset, SizeReg, Offset, AlignedCode, CCode). + +put_float(NewOffset, Src, Base, Offset, 64, CCode, Aligned, LittleEndian, + ConstInfo, TrueLblName) -> + [CLbl] = create_lbls(1), + case {Aligned, LittleEndian} of + {true, false} -> + copy_float_big(Base, Offset, NewOffset, Src, + hipe_rtl:label_name(CLbl), TrueLblName, ConstInfo) ++ + [CLbl|CCode]; + {true, true} -> + copy_float_little(Base, Offset, NewOffset, Src, + hipe_rtl:label_name(CLbl), TrueLblName, ConstInfo) ++ + [CLbl|CCode]; + {false, _} -> + CCode + end; +put_float(_NewOffset, _Src, _Base, _Offset, _Size, CCode, _Aligned, + _LittleEndian, _ConstInfo, _TrueLblName) -> + CCode. + +put_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), + case {Aligned, LittleEndian} of + {true, true} -> + Init ++ + copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {true, false} -> + Init ++ + copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {false, true} -> + CCode; + {false, false} -> + Init ++ + copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End + end. + +put_unsafe_static_int(NewOffset, Src, Base, Offset, Size, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + case {Aligned, LittleEndian} of + {true, true} -> + Init ++ + copy_int_little(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {true, false} -> + Init ++ + copy_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End; + {false, true} -> + CCode; + {false, false} -> + Init ++ + copy_offset_int_big(Base, Offset, NewOffset, Size, UntaggedSrc) ++ + End + end. + +put_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, CCode, TrueLblName), + case Aligned of + true -> + case LittleEndian of + true -> + Init ++ + copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End; + false -> + Init ++ + copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End + end; + false -> + CCode + end. + +put_unsafe_dynamic_int(NewOffset, Src, Base, Offset, SizeReg, CCode, Aligned, + LittleEndian, TrueLblName) -> + {Init, End, UntaggedSrc} = make_init_end(Src, TrueLblName), + case Aligned of + true -> + case LittleEndian of + true -> + Init ++ + copy_int_little(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End; + false -> + Init ++ + copy_int_big(Base, Offset, NewOffset, SizeReg, UntaggedSrc) ++ + End + end; + false -> + CCode + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Help functions used by the above +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +make_init_end(Src, CCode, TrueLblName) -> + [CLbl, SuccessLbl] = create_lbls(2), + [UntaggedSrc] = create_regs(1), + Init = [hipe_tagscheme:test_fixnum(Src, hipe_rtl:label_name(SuccessLbl), + hipe_rtl:label_name(CLbl), 0.99), + SuccessLbl, + hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], + End = [hipe_rtl:mk_goto(TrueLblName), CLbl| CCode], + {Init, End, UntaggedSrc}. + +make_init_end(Src, TrueLblName) -> + [UntaggedSrc] = create_regs(1), + Init = [hipe_tagscheme:untag_fixnum(UntaggedSrc,Src)], + End = [hipe_rtl:mk_goto(TrueLblName)], + {Init, End, UntaggedSrc}. + +get_base_offset_size(Binary, SrcBase, SrcOffset, SrcSize, FLName) -> + [JoinLbl, EndLbl, SuccessLbl, SubLbl, OtherLbl, HeapLbl, REFCLbl] = + Lbls = create_lbls(7), + [JoinLblName, EndLblName, SuccessLblName, SubLblName, + OtherLblName, HeapLblName, REFCLblName] = get_label_names(Lbls), + [BitSize,BitOffset] = create_regs(2), + [Orig] = create_vars(1), + [hipe_tagscheme:test_bitstr(Binary, SuccessLblName, FLName, 0.99), + SuccessLbl, + get_field_from_term({sub_binary,binsize}, Binary, SrcSize), + hipe_rtl:mk_alu(SrcSize, SrcSize, sll, ?BYTE_SHIFT), + hipe_tagscheme:test_subbinary(Binary, SubLblName, OtherLblName), + SubLbl, + get_field_from_term({sub_binary,bitsize}, Binary, BitSize), + get_field_from_term({sub_binary,offset}, Binary, SrcOffset), + hipe_rtl:mk_alu(SrcSize, SrcSize, add, BitSize), + get_field_from_term({sub_binary,bitoffset}, Binary, BitOffset), + hipe_rtl:mk_alu(SrcOffset, SrcOffset, sll, ?BYTE_SHIFT), + hipe_rtl:mk_alu(SrcOffset, SrcOffset, add, BitOffset), + get_field_from_term({sub_binary,orig}, Binary, Orig), + hipe_rtl:mk_goto(JoinLblName), + OtherLbl, + hipe_rtl:mk_move(SrcOffset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(Orig, Binary), + JoinLbl, + hipe_tagscheme:test_heap_binary(Orig, HeapLblName, REFCLblName), + HeapLbl, + hipe_rtl:mk_alu(SrcBase, Orig, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA-2)), + hipe_rtl:mk_goto(EndLblName), + REFCLbl, + get_field_from_term({proc_bin,bytes}, Orig, SrcBase), + EndLbl]. + +copy_aligned_bytes(CopyBase, CopyOffset, Size, Base, Offset, NewOffset, TrueLblName) -> + [BaseDst, BaseSrc] = create_unsafe_regs(2), + [Iter, Extra, BothOffset] = create_regs(3), + initializations(BaseSrc, BaseDst, BothOffset, CopyOffset, Offset, CopyBase, Base) ++ + [hipe_rtl:mk_alu(Extra, Size, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Iter, Size, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)] ++ + easy_loop(BaseSrc, BaseDst, BothOffset, Iter, Extra, TrueLblName). + +copy_string(StringBase, StringSize, BinBase, BinOffset, NewOffset, TrueLblName) -> + [TmpOffset,BothOffset,InitOffs] = create_regs(3), + [NewBinBase] = create_unsafe_regs(1), + [EasyLbl,HardLbl] = create_lbls(2), + [hipe_rtl:mk_alu(TmpOffset, BinOffset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewBinBase, BinBase, add, TmpOffset), + hipe_rtl:mk_move(BothOffset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(InitOffs, BinOffset, 'and', ?LOW_BITS, eq, + hipe_rtl:label_name(EasyLbl), hipe_rtl:label_name(HardLbl)), + EasyLbl, + hipe_rtl:mk_alu(NewOffset, BinOffset, add, + hipe_rtl:mk_imm(?bytes_to_bits(StringSize)))] ++ + easy_loop(StringBase, NewBinBase, BothOffset, + hipe_rtl:mk_imm(StringSize), hipe_rtl:mk_imm(0), TrueLblName) ++ + [HardLbl, + hipe_rtl:mk_alu(NewOffset, BinOffset, add, + hipe_rtl:mk_imm(?bytes_to_bits(StringSize)))] ++ + hard_loop(StringBase, NewBinBase, BothOffset, hipe_rtl:mk_imm(StringSize), + InitOffs, TrueLblName). + +small_check(SizeVar, CopySize, FalseLblName) -> + SuccessLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(SizeVar, le, CopySize, + hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl]. + +easy_loop(BaseSrc, BaseDst, BothOffset, Iterations, Extra, TrueLblName) -> + [Tmp1,Shift] = create_regs(2), + [LoopLbl,TopLbl,EndLbl,ExtraLbl] = create_lbls(4), + [TopLbl, + hipe_rtl:mk_branch(BothOffset, ne, Iterations, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl), 0.99), + LoopLbl, + hipe_rtl:mk_load(Tmp1, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_store(BaseDst, BothOffset, Tmp1, byte), + hipe_rtl:mk_alu(BothOffset, BothOffset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TopLbl)), + EndLbl, + hipe_rtl:mk_branch(Extra, eq, hipe_rtl:mk_imm(0), TrueLblName, + hipe_rtl:label_name(ExtraLbl)), + ExtraLbl, + hipe_rtl:mk_load(Tmp1, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Shift, hipe_rtl:mk_imm(?BYTE_SIZE), sub, Extra), + hipe_rtl:mk_alu(Tmp1, Tmp1, srl, Shift), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, Shift), + hipe_rtl:mk_store(BaseDst, BothOffset, Tmp1, byte), + hipe_rtl:mk_goto(TrueLblName)]. + +hard_loop(BaseSrc, BaseDst, BothOffset, Iterations, + InitOffset, TrueLblName) -> + [Tmp1, Tmp2, OldByte, NewByte, SaveByte] = create_regs(5), + [LoopLbl,EndLbl,TopLbl] = create_lbls(3), + [hipe_rtl:mk_load(OldByte, BaseDst, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp1, hipe_rtl:mk_imm(?BYTE_SIZE), sub, InitOffset), + TopLbl, + hipe_rtl:mk_branch(BothOffset, ne, Iterations, + hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + LoopLbl, + hipe_rtl:mk_load(NewByte, BaseSrc, BothOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp2, NewByte, srl, InitOffset), + hipe_rtl:mk_alu(SaveByte, OldByte, 'or', Tmp2), + hipe_rtl:mk_store(BaseDst, BothOffset, SaveByte, byte), + hipe_rtl:mk_alu(OldByte, NewByte, sll, Tmp1), + hipe_rtl:mk_alu(BothOffset, BothOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TopLbl)), + EndLbl, + hipe_rtl:mk_store(BaseDst, BothOffset, OldByte, byte), + hipe_rtl:mk_goto(TrueLblName)]. + +initializations(BaseTmp1, BaseTmp2, BothOffset, CopyOffset, Offset, CopyBase, Base) -> + [OffsetTmp1,OffsetTmp2] = create_regs(2), + [hipe_rtl:mk_alu(OffsetTmp1, CopyOffset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(OffsetTmp2, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(BaseTmp1, CopyBase, add, OffsetTmp1), + hipe_rtl:mk_alu(BaseTmp2, Base, add, OffsetTmp2), + hipe_rtl:mk_move(BothOffset, hipe_rtl:mk_imm(0))]. + +copy_int_little(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + [Tmp2,TmpOffset] = create_regs(2), + ByteSize = Size div ?BYTE_SIZE, + [hipe_rtl:mk_alu(TmpOffset, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp2, hipe_rtl:mk_imm(ByteSize), 'add', TmpOffset)] ++ + + little_loop(Tmp1, Tmp2, TmpOffset, Base) ++ + + case Size band 7 of + 0 -> + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))]; + Bits -> + [hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(?BYTE_SIZE-Bits)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))] + end; + +copy_int_little(Base, Offset, NewOffset, Size, Tmp1) -> + [Tmp2, Tmp3, Tmp4, TmpOffset] = create_regs(4), + + [hipe_rtl:mk_alu(Tmp2, Size, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(TmpOffset, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp3, Tmp2, 'add', TmpOffset)] ++ + + little_loop(Tmp1, Tmp3, TmpOffset, Base) ++ + + [hipe_rtl:mk_alu(Tmp4, Size, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp4, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, Tmp4), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)]. + +little_loop(Tmp1, Tmp3, TmpOffset, Base) -> + [BranchLbl, BodyLbl, EndLbl] = create_lbls(3), + [BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, + hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl]. + +big_loop(Tmp1, Tmp3, TmpOffset, Base) -> + [BranchLbl, BodyLbl, EndLbl] = create_lbls(3), + [BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, + hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl]. + +copy_int_big(_Base, Offset, NewOffset, 0, _Tmp1) -> + [hipe_rtl:mk_move(NewOffset, Offset)]; +copy_int_big(Base, Offset, NewOffset, ?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(8))]; +copy_int_big(Base, Offset, NewOffset, 2*?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(16))]; +copy_int_big(Base, Offset, NewOffset, 3*?BYTE_SIZE, Tmp1) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, srl, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, add, hipe_rtl:mk_imm(2)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(NewOffset, Offset, add, hipe_rtl:mk_imm(24))]; +copy_int_big(Base, Offset,NewOffset, 4*?BYTE_SIZE, Tmp1) -> + copy_big_word(Base, Offset, NewOffset, Tmp1); +copy_int_big(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + [OldOffset, TmpOffset, Bits] = create_regs(3), + ByteSize = (Size + 7) div ?BYTE_SIZE, + case Size band 7 of + 0 -> + [hipe_rtl:mk_alu(OldOffset, Offset, sra, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, OldOffset, add, hipe_rtl:mk_imm(ByteSize))]; + Rest -> + [hipe_rtl:mk_alu(OldOffset, Offset, sra, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, OldOffset, add, hipe_rtl:mk_imm(ByteSize-1)), + hipe_rtl:mk_alu(Bits, Tmp1, sll, hipe_rtl:mk_imm(?BYTE_SIZE-Rest)), + hipe_rtl:mk_store(Base, TmpOffset, Bits, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, sra, hipe_rtl:mk_imm(Rest))] + end ++ + big_loop(Tmp1, OldOffset, TmpOffset, Base) ++ + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size))]; +copy_int_big(Base, Offset, NewOffset, Size, Tmp1) -> + Tmp2 = hipe_rtl:mk_new_reg(), + Tmp3 = hipe_rtl:mk_new_reg(), + Tmp4 = hipe_rtl:mk_new_reg(), + Tmp5 = hipe_rtl:mk_new_reg(), + Tmp6 = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + EvenLbl = hipe_rtl:mk_new_label(), + OddLbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp2, Size, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(Tmp3, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, Tmp2, 'add', Tmp3), + hipe_rtl:mk_alub(Tmp4, Size, 'and', hipe_rtl:mk_imm(7), 'eq', + hipe_rtl:label_name(EvenLbl), hipe_rtl:label_name(OddLbl)), + OddLbl, + hipe_rtl:mk_alu(Tmp6, hipe_rtl:mk_imm(8), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp5, Tmp1, 'sll', Tmp6), + hipe_rtl:mk_store(Base, TmpOffset, Tmp5, byte), + EvenLbl, + hipe_rtl:mk_alu(Tmp1, Tmp1, srl, Tmp4)] ++ + + big_loop(Tmp1, Tmp3, TmpOffset, Base) ++ + + [hipe_rtl:mk_alu(NewOffset, Offset, 'add', Size)]. + +copy_big_word(Base, Offset, NewOffset, Word) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(3)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(8)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(32))]. + +copy_little_word(Base, Offset, NewOffset, Word) -> + TmpOffset = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(TmpOffset, Offset, 'srl', ?BYTE_SHIFT), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'add', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Word, Word, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_store(Base, TmpOffset, Word, byte), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(32))]. + +copy_offset_int_big(Base, Offset, NewOffset, Size, Tmp1) when is_integer(Size) -> + Tmp2 = hipe_rtl:mk_new_reg(), + Tmp3 = hipe_rtl:mk_new_reg(), + Tmp4 = hipe_rtl:mk_new_reg(), + Tmp5 = hipe_rtl:mk_new_reg(), + Tmp6 = hipe_rtl:mk_new_reg(), + Tmp7 = hipe_rtl:mk_new_reg(), + Tmp8 = hipe_rtl:mk_new_reg(), + Tmp9 = hipe_rtl:mk_new_reg(), + OldByte = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + BranchLbl = hipe_rtl:mk_new_label(), + BodyLbl = hipe_rtl:mk_new_label(), + EndLbl = hipe_rtl:mk_new_label(), + NextLbl = hipe_rtl:mk_new_label(), + WordSize = hipe_rtl_arch:word_size(), + [hipe_rtl:mk_alu(Tmp2, Offset, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp3, Offset, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(NewOffset, Offset, 'add', hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(Tmp9, NewOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(TmpOffset, Tmp9, srl, ?BYTE_SHIFT), + hipe_rtl:mk_alu(Tmp4, NewOffset, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp6, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp4), + hipe_rtl:mk_alu(Tmp6, Tmp6, 'and', ?LOW_BITS), + hipe_rtl:mk_alu(Tmp4, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp6), + hipe_rtl:mk_move(Tmp5, Tmp1), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sll', Tmp6), + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, hipe_rtl:label_name(NextLbl), + hipe_rtl:label_name(EndLbl)), + NextLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_move(Tmp1, Tmp5), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', Tmp4), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + BranchLbl, + hipe_rtl:mk_branch(TmpOffset, 'ne', Tmp3, hipe_rtl:label_name(BodyLbl), + hipe_rtl:label_name(EndLbl)), + BodyLbl, + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sra', hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(TmpOffset, TmpOffset, 'sub', hipe_rtl:mk_imm(1)), + hipe_rtl:mk_goto(hipe_rtl:label_name(BranchLbl)), + EndLbl, + hipe_rtl:mk_load(OldByte, Base, TmpOffset, byte, unsigned), + hipe_rtl:mk_alu(Tmp8, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', Tmp2), + hipe_rtl:mk_alu(OldByte, OldByte, 'srl', Tmp8), + hipe_rtl:mk_alu(OldByte, OldByte, 'sll', Tmp8), + hipe_rtl:mk_alu(Tmp7, Tmp2, 'add', + hipe_rtl:mk_imm(?bytes_to_bits(WordSize-1))), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'sll', Tmp7), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'srl', Tmp7), + hipe_rtl:mk_alu(Tmp1, Tmp1, 'or', OldByte), + hipe_rtl:mk_store(Base, TmpOffset, Tmp1, byte)]. + +copy_float_little(_Base, _Offset, _NewOffset, _Src, FalseLblName, _TrueLblName, fail) -> + [hipe_rtl:mk_goto(FalseLblName)]; +copy_float_little(Base, Offset, NewOffset, Src, _FalseLblName, TrueLblName, pass) -> + FloatLo = hipe_rtl:mk_new_reg(), + FloatHi = hipe_rtl:mk_new_reg(), + TmpOffset = hipe_rtl:mk_new_reg(), + hipe_tagscheme:unsafe_load_float(FloatLo, FloatHi, Src) ++ + copy_little_word(Base, Offset, TmpOffset, FloatLo) ++ + copy_little_word(Base, TmpOffset, NewOffset, FloatHi) ++ + [hipe_rtl:mk_goto(TrueLblName)]; +copy_float_little(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, var) -> + SuccessLbl = hipe_rtl:mk_new_label(), + hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99) ++ + [SuccessLbl|copy_float_little(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, pass)]. + +copy_float_big(_Base, _Offset, _NewOffset, _Src, FalseLblName, _TrueLblName, fail) -> + [hipe_rtl:mk_goto(FalseLblName)]; +copy_float_big(Base, Offset, NewOffset, Src, _FalseLblName, TrueLblName,pass) -> + FloatLo = hipe_rtl:mk_new_reg(), + FloatHi = hipe_rtl:mk_new_reg(), + TmpOffset =hipe_rtl:mk_new_reg(), + hipe_tagscheme:unsafe_load_float(FloatLo, FloatHi, Src) ++ + copy_big_word(Base, Offset, TmpOffset, FloatHi) ++ + copy_big_word(Base, TmpOffset, NewOffset, FloatLo) ++ + [hipe_rtl:mk_goto(TrueLblName)]; +copy_float_big(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, var) -> + SuccessLbl = hipe_rtl:mk_new_label(), + hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99) ++ + [SuccessLbl|copy_float_big(Base, Offset, NewOffset, Src, FalseLblName, TrueLblName, pass)]. + +make_size(1, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + {first_part(BitsVar, DstReg, FalseLblName), DstReg}; +make_size(?BYTE_SIZE, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + Code = + first_part(BitsVar, DstReg, FalseLblName) ++ + [hipe_rtl:mk_alu(DstReg, DstReg, 'sll', ?BYTE_SHIFT)], + {Code, DstReg}; +make_size(UnitImm, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + UnitList = number2list(UnitImm), + Code = multiply_code(UnitList, BitsVar, DstReg, FalseLblName), + {Code, DstReg}. + +multiply_code(List=[Head|_Tail], Variable, Result, FalseLblName) -> + Test = set_high(Head), + Tmp1 = hipe_rtl:mk_new_reg(), + SuccessLbl = hipe_rtl:mk_new_label(), + Register = hipe_rtl:mk_new_reg(), + Code = [hipe_rtl:mk_move(Result, hipe_rtl:mk_imm(0))| + first_part(Variable, Register, FalseLblName)] + ++ + [hipe_rtl:mk_alub(Tmp1, Register, 'and', hipe_rtl:mk_imm(Test), + 'eq', hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl], + multiply_code(List, Register, Result, FalseLblName, Tmp1, Code). + +multiply_code([ShiftSize|Rest], Register, Result, FalseLblName, Tmp1, OldCode) -> + SuccessLbl = hipe_rtl:mk_new_label(), + Code = OldCode ++ [hipe_rtl:mk_alu(Tmp1, Register, 'sll', + hipe_rtl:mk_imm(ShiftSize)), + hipe_rtl:mk_alub(Result, Tmp1, 'add', Result, not_overflow, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99), + SuccessLbl], + multiply_code(Rest, Register, Result, FalseLblName, Tmp1, Code); +multiply_code([], _Register, _Result, _FalseLblName, _Tmp1, Code) -> + Code. + +number2list(X) when is_integer(X), X >= 0 -> + number2list(X, []). + +number2list(1, Acc) -> + lists:reverse([0|Acc]); +number2list(0, Acc) -> + lists:reverse(Acc); +number2list(X, Acc) -> + F = floorlog2(X), + number2list(X-(1 bsl F), [F|Acc]). + +floorlog2(X) -> + round(math:log(X)/math:log(2)-0.5). + +set_high(X) -> + set_high(X, 0). + +set_high(0, Y) -> + Y; +set_high(X, Y) -> + set_high(X-1, Y+(1 bsl (27-X))). + +get_32_bit_value(Size, USize, SystemLimitLblName, NegLblName) -> + Lbls = [FixLbl, BigLbl, OkLbl, PosBigLbl] = create_lbls(4), + [FixLblName, BigLblName, OkLblName, PosBigLblName] = [hipe_rtl:label_name(Lbl) || Lbl <- Lbls], + [hipe_tagscheme:test_fixnum(Size, FixLblName, BigLblName, 0.99), + FixLbl, + hipe_tagscheme:untag_fixnum(USize, Size), + hipe_rtl:mk_branch(USize, ge, hipe_rtl:mk_imm(0), OkLblName, NegLblName), + BigLbl, + hipe_tagscheme:test_pos_bignum(Size, PosBigLblName, NegLblName, 0.99), + PosBigLbl, + hipe_tagscheme:get_one_word_pos_bignum(USize, Size, SystemLimitLblName), + OkLbl]. + + +first_part(Var, Register, FalseLblName) -> + [SuccessLbl1, SuccessLbl2] = create_lbls(2), + [hipe_tagscheme:test_fixnum(Var, hipe_rtl:label_name(SuccessLbl1), + FalseLblName, 0.99), + SuccessLbl1, + hipe_tagscheme:fixnum_ge(Var, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + hipe_rtl:label_name(SuccessLbl2), FalseLblName, 0.99), + SuccessLbl2, + hipe_tagscheme:untag_fixnum(Register, Var)]. + + diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl new file mode 100644 index 0000000000..d147bed6d8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl @@ -0,0 +1,1134 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_binary_match.erl +%%% Author : Per Gustafsson <pergu@it.uu.se> +%%% Description : +%%% +%%% Created : 5 Mar 2007 by Per Gustafsson <pergu@it.uu.se> +%%%------------------------------------------------------------------- +-module(hipe_rtl_binary_match). + +-export([gen_rtl/5]). + +-import(hipe_tagscheme, [set_field_from_term/3, get_field_from_term/3]). + +-include("hipe_literals.hrl"). + +%%-------------------------------------------------------------------- + +-define(MAX_BINSIZE, trunc(?MAX_HEAP_BIN_SIZE / hipe_rtl_arch:word_size()) + 2). +-define(BYTE_SHIFT, 3). %% Turn bits into bytes or vice versa +-define(LOW_BITS, 7). %% Three lowest bits set +-define(BYTE_SIZE, 8). +-define(MAX_SMALL_BITS, (hipe_rtl_arch:word_size() * ?BYTE_SIZE - 5)). + +%%-------------------------------------------------------------------- + +gen_rtl({bs_start_match, 0}, [Ms], [Binary], TrueLblName, FalseLblName) -> + ReInitLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(ReInitLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + ReInitCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, 0, Ms, TrueLblName, FalseLblName), + [TestCode,[ReInitLbl|ReInitCode],[BinaryLbl|OrdinaryCode]]; +gen_rtl({bs_start_match, Max}, [Ms], [Binary], TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + ReSizeLbl = hipe_rtl:mk_new_label(), + ReInitLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(MatchStateLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + MatchStateTestCode = + [hipe_tagscheme:compare_matchstate(Max, Ms, + hipe_rtl:label_name(ReInitLbl), + hipe_rtl:label_name(ReSizeLbl))], + ReSizeCode = resize_matchstate(Ms, Max, TrueLblName), + ReInitCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName), + [TestCode, [MatchStateLbl|MatchStateTestCode], [ReSizeLbl|ReSizeCode], + [ReInitLbl|ReInitCode], [BinaryLbl|OrdinaryCode]]; +gen_rtl({bs_start_match, _Max}, [], [Binary], TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_bitstr(Binary, TrueLblName, + hipe_rtl:label_name(MatchStateLbl), 0.99), + MatchStateLbl, + hipe_tagscheme:test_matchstate(Binary, TrueLblName, FalseLblName, 0.99)]; +gen_rtl({{bs_start_match, bitstr}, Max}, [Ms], [Binary], + TrueLblName, FalseLblName) -> + make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName); +gen_rtl({{bs_start_match, bitstr}, _Max}, [], [_Binary], + TrueLblName, _FalseLblName) -> + [hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({{bs_start_match,ok_matchstate}, Max}, [Ms], [Binary], + TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + BinaryLbl = hipe_rtl:mk_new_label(), + TestCode = + [hipe_rtl:mk_move(Ms,Binary), + hipe_tagscheme:test_matchstate(Binary, + hipe_rtl:label_name(MatchStateLbl), + hipe_rtl:label_name(BinaryLbl), + 0.99)], + MatchStateCode = reinit_matchstate(Ms, TrueLblName), + OrdinaryCode = make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName), + TestCode ++ [MatchStateLbl|MatchStateCode] ++ [BinaryLbl|OrdinaryCode]; +gen_rtl({{bs_start_match, ok_matchstate}, _Max}, [], [Binary], + TrueLblName, FalseLblName) -> + MatchStateLbl = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_bitstr(Binary, TrueLblName, + hipe_rtl:label_name(MatchStateLbl), 0.99), + MatchStateLbl, + hipe_tagscheme:test_matchstate(Binary, TrueLblName, FalseLblName, 0.99)]; +gen_rtl({bs_get_integer, 0, _Flags}, [Dst, NewMs], [Ms], + TrueLblName, _FalseLblName) -> + update_ms(NewMs, Ms) ++ + [hipe_rtl:mk_move(Dst, hipe_rtl:mk_imm(15)), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_get_integer,Size,Flags}, [Dst,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Signed = signed(Flags), + LittleEndian = littleendian(Flags), + Aligned = aligned(Flags), + UnSafe = unsafe(Flags), + case Args of + [Ms] -> + CCode= int_get_c_code(Dst, Ms, hipe_rtl:mk_imm(Size), + Flags, TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ + get_static_int(Dst, Ms, Size, CCode, + Signed, LittleEndian, Aligned, UnSafe, + TrueLblName, FalseLblName); + [Ms, Arg] -> + {SizeCode1, SizeReg1} = + make_size(Size, Arg, FalseLblName), + CCode = int_get_c_code(Dst, Ms, SizeReg1, Flags, + TrueLblName, FalseLblName), + InCode = get_dynamic_int(Dst, Ms, SizeReg1, CCode, + Signed, LittleEndian, Aligned, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ SizeCode1 ++ InCode + end + end; +gen_rtl({bs_get_float,Size,Flags}, [Dst1,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + [hipe_rtl:mk_gctest(3)] ++ + case Args of + [Ms] -> + CCode = float_get_c_code(Dst1, Ms, hipe_rtl:mk_imm(Size), Flags, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ CCode; + [Ms,Arg] -> + {SizeCode, SizeReg} = make_size(Size, Arg, + FalseLblName), + CCode = float_get_c_code(Dst1, Ms, SizeReg, Flags, + TrueLblName, FalseLblName), + update_ms(NewMs, Ms) ++ SizeCode ++ CCode + end + end; +gen_rtl({bs_get_binary_all, Unit, _Flags}, [Dst], [Ms], + TrueLblName, FalseLblName) -> + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + get_binary_all(Dst, Unit, Ms, TrueLblName,FalseLblName); +gen_rtl({bs_get_binary_all_2, Unit, _Flags}, [Dst,NewMs], [Ms], + TrueLblName, FalseLblName) -> + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + update_ms(NewMs, Ms) ++ + get_binary_all(Dst, Unit, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_get_binary,Size,Flags}, [Dst,NewMs], Args, + TrueLblName, FalseLblName) -> + case is_illegal_const(Size) of + true -> + [hipe_rtl:mk_goto(FalseLblName)]; + false -> + Unsafe = unsafe(Flags), + case Args of + [Ms] -> + SizeReg = hipe_rtl:mk_new_reg(), + SizeCode = [hipe_rtl:mk_move(SizeReg, hipe_rtl:mk_imm(Size))]; + [Ms, BitsVar] -> + {SizeCode, SizeReg} = make_size(Size, BitsVar, FalseLblName) + end, + InCode = get_binary(Dst, Ms, SizeReg, Unsafe, + TrueLblName, FalseLblName), + [hipe_rtl:mk_gctest(?SUB_BIN_WORDSIZE)] ++ + update_ms(NewMs, Ms) ++ SizeCode ++ InCode + end; +gen_rtl(bs_get_utf8, [Dst,NewMs], [Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ utf8_get_c_code(Dst, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_get_utf16,Flags}, [Dst,NewMs], [Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ utf16_get_c_code(Flags, Dst, Ms, TrueLblName, FalseLblName); +gen_rtl(bs_validate_unicode_retract, [NewMs], [Src,Ms], TrueLblName, FalseLblName) -> + update_ms(NewMs, Ms) ++ validate_unicode_retract_c_code(Src, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_test_tail, NumBits}, [NewMs], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + update_ms(NewMs, Ms) ++ ExCode ++ + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(NumBits), FalseLblName), + hipe_rtl:mk_branch(Offset, eq, BinSize, TrueLblName, FalseLblName)]; +gen_rtl({bs_test_unit, Unit}, [], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + SizeReg = hipe_rtl:mk_new_reg(), + ExCode ++ + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + test_alignment_code(SizeReg, Unit, TrueLblName, FalseLblName)]; +gen_rtl({bs_test_tail, NumBits}, [], [Ms], TrueLblName, FalseLblName) -> + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(NumBits), FalseLblName), + hipe_rtl:mk_branch(Offset, eq, BinSize, TrueLblName, FalseLblName)]; +gen_rtl({bs_skip_bits_all, Unit, _Flags}, Dst, [Ms], + TrueLblName, FalseLblName) -> + opt_update_ms(Dst, Ms) ++ + skip_bits_all(Unit, Ms, TrueLblName, FalseLblName); +gen_rtl({bs_skip_bits, Bits}, Dst, [Ms|Args], TrueLblName, FalseLblName) -> + opt_update_ms(Dst,Ms) ++ + case Args of + [] -> + skip_bits2(Ms, hipe_rtl:mk_imm(Bits), TrueLblName, FalseLblName); + [Arg] -> + {SizeCode, SizeReg} = make_size(Bits, Arg, FalseLblName), + InCode = skip_bits2(Ms, SizeReg, TrueLblName, FalseLblName), + SizeCode ++ InCode + end; +gen_rtl({bs_restore, Slot}, [NewMs], [Ms], TrueLblName, _FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + update_ms(NewMs, Ms) ++ + [get_field_from_term({matchstate, {saveoffset, Slot}}, Ms, Tmp1), + set_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Tmp1), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_save, Slot}, [NewMs], [Ms], TrueLblName, _FalseLblName) -> + {Offset, Instr} = extract_matchstate_var(offset, Ms), + update_ms(NewMs, Ms) ++ + [Instr, + set_field_from_term({matchstate, {saveoffset, Slot}}, Ms, Offset), + hipe_rtl:mk_goto(TrueLblName)]; +gen_rtl({bs_match_string, String, ByteSize}, [NewMs], + [Ms], TrueLblName, FalseLblName) -> + {[Offset, BinSize, Base], Instrs} = + extract_matchstate_vars([offset, binsize, base], Ms), + [SuccessLbl, ALbl, ULbl] = create_lbls(3), + [NewOffset,BitOffset] = create_gcsafe_regs(2), + Unit = hipe_rtl_arch:word_size() - 1, + Loops = ByteSize div Unit, + Init = + [Instrs, + update_ms(NewMs,Ms), + check_size(Offset, hipe_rtl:mk_imm(ByteSize*?BYTE_SIZE), BinSize, + NewOffset, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl], + SplitCode = + [hipe_rtl:mk_alub(BitOffset, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS), eq, + hipe_rtl:label_name(ALbl), hipe_rtl:label_name(ULbl))], + Loops = ByteSize div Unit, + SkipSize = Loops * Unit, + {ACode1,UCode1} = + case Loops of + 0 -> + {[],[]}; + _ -> + create_loops(Loops, Unit, String, Base, + Offset, BitOffset, FalseLblName) + end, + <<_:SkipSize/binary, RestString/binary>> = String, + {ACode2, UCode2} = + case ByteSize rem Unit of + 0 -> + {[],[]}; + Rem -> + create_rests(Rem, RestString, Base, Offset, BitOffset, FalseLblName) + end, + End = [update_offset(NewOffset, NewMs), hipe_rtl:mk_goto(TrueLblName)], + [Init, SplitCode, ALbl, ACode1, ACode2, End, ULbl, UCode1, UCode2,End]; +gen_rtl(bs_context_to_binary, [Bin], [Var], TrueLblName, _FalseLblName) -> + MSLabel = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_move(Bin, Var), + hipe_tagscheme:test_matchstate(Var, hipe_rtl:label_name(MSLabel), + TrueLblName, 0.5), + MSLabel, + hipe_tagscheme:convert_matchstate(Bin), + hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Calls to C %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +int_get_c_code(Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + make_int_gc_code(Size) ++ + get_c_code(bs_get_integer_2, Dst1, Ms, Size, Flags, + TrueLblName, FalseLblName). + +float_get_c_code(Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + get_c_code(bs_get_float_2, Dst1, Ms, Size, Flags, TrueLblName, FalseLblName). + +get_c_code(Func, Dst1, Ms, Size, Flags, TrueLblName, FalseLblName) -> + SizeReg = hipe_rtl:mk_new_reg_gcsafe(), + FlagsReg = hipe_rtl:mk_new_reg_gcsafe(), + MatchBuf = hipe_rtl:mk_new_reg(), + RetLabel = hipe_rtl:mk_new_label(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_rtl:mk_move(SizeReg, Size), + hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Dst1], Func, [SizeReg, FlagsReg, MatchBuf], + hipe_rtl:label_name(RetLabel), FalseLblName), + RetLabel, + hipe_rtl:mk_branch(Dst1, eq, NonVal, + FalseLblName, + TrueLblName, 0.01)]. + +utf8_get_c_code(Dst, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Dst], bs_get_utf8, [MatchBuf], [], []), + hipe_rtl:mk_branch(Dst, eq, NonVal, FalseLblName, TrueLblName, 0.01)]. + +utf16_get_c_code(Flags, Dst, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + NonVal = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + FlagsReg = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl:mk_move(FlagsReg, hipe_rtl:mk_imm(Flags)), + hipe_rtl_arch:call_bif([Dst], bs_get_utf16, [MatchBuf, FlagsReg], [], []), + hipe_rtl:mk_branch(Dst, eq, NonVal, FalseLblName, TrueLblName, 0.01)]. + +validate_unicode_retract_c_code(Src, Ms, TrueLblName, FalseLblName) -> + MatchBuf = hipe_rtl:mk_new_reg(), + Zero = hipe_rtl:mk_imm(0), + Tmp = hipe_rtl:mk_new_reg(), + [hipe_tagscheme:extract_matchbuffer(MatchBuf, Ms), + hipe_rtl_arch:call_bif([Tmp], bs_validate_unicode_retract, + [MatchBuf,Src], [], []), + hipe_rtl:mk_branch(Tmp, eq, Zero, FalseLblName, TrueLblName, 0.01)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Int Code %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_loops(Loops, Unit, String, Base, Offset, BitOffset, FalseLblName) -> + [Reg] = create_gcsafe_regs(1), + AlignedFun = fun(Value) -> + [get_int_to_reg(Reg, Unit*?BYTE_SIZE, Base, Offset, 'srl', + {unsigned, big}), + update_and_test(Reg, Unit, Offset, Value, FalseLblName)] + end, + UnAlignedFun = fun(Value) -> + [get_unaligned_int_to_reg(Reg, Unit*?BYTE_SIZE, + Base, Offset, BitOffset, + 'srl', {unsigned, big})| + update_and_test(Reg, Unit, Offset, Value, FalseLblName)] + end, + {create_loops(Loops, Unit, String, AlignedFun), + create_loops(Loops, Unit, String, UnAlignedFun)}. + +create_rests(Rem, String, Base, Offset, BitOffset, FalseLblName) -> + [Reg] = create_gcsafe_regs(1), + AlignedFun = fun(Value) -> + [get_int_to_reg(Reg, Rem*?BYTE_SIZE, Base, Offset, 'srl', + {unsigned, big})| + just_test(Reg, Value, FalseLblName)] + end, + UnAlignedFun = fun(Value) -> + [get_unaligned_int_to_reg(Reg, Rem*?BYTE_SIZE, + Base, Offset, BitOffset, + 'srl', {unsigned, big})| + just_test(Reg, Value, FalseLblName)] + end, + {create_loops(1, Rem, String, AlignedFun), + create_loops(1, Rem, String, UnAlignedFun)}. + +create_loops(0, _Unit, _String, _IntFun) -> + []; +create_loops(N, Unit, String, IntFun) -> + {Value, RestString} = get_value(Unit,String), + [IntFun(Value), + create_loops(N-1, Unit, RestString, IntFun)]. + +update_and_test(Reg, Unit, Offset, Value, FalseLblName) -> + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(Unit*?BYTE_SIZE), FalseLblName), + just_test(Reg, Value, FalseLblName)]. + +just_test(Reg, Value, FalseLblName) -> + [ContLbl] = create_lbls(1), + [hipe_rtl:mk_branch(Reg, eq, hipe_rtl:mk_imm(Value), + hipe_rtl:label_name(ContLbl), FalseLblName), + ContLbl]. + +get_value(N,String) -> + <<I:N/integer-unit:8, Rest/binary>> = String, + {I, Rest}. + +make_int_gc_code(I) when is_integer(I) -> + case hipe_tagscheme:bignum_sizeneed(I) of + 0 -> []; + X when is_integer(X) -> [hipe_rtl:mk_gctest(X)] + end; +make_int_gc_code(SReg) -> + FixNumLbl = hipe_rtl:mk_new_label(), + FixNumLblName = hipe_rtl:label_name(FixNumLbl), + {ResReg,Code} = hipe_tagscheme:bignum_sizeneed_code(SReg, FixNumLblName), + Code ++ + [hipe_rtl:mk_gctest(ResReg), + hipe_rtl:mk_goto(FixNumLblName), + FixNumLbl]. + +get_static_int(Dst1, Ms, Size, CCode, Signed, LittleEndian, Aligned, + Unsafe, TrueLblName, FalseLblName) -> + WordSize = hipe_rtl_arch:word_size(), + case Size =< WordSize*?BYTE_SIZE of + true -> + case {Aligned, LittleEndian} of + {true, false} -> + get_int_from_bin(Ms, Size, Dst1,Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName); + {true, true} -> + case Size rem ?BYTE_SIZE of + 0 -> + get_int_from_bin(Ms, Size, Dst1, Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName); + _ -> + CCode + end; + {false, false} -> + get_int_from_unaligned_bin(Ms, Size, Dst1, Signed, + Unsafe, FalseLblName, TrueLblName); + {false, true} -> + CCode + end; + false -> + CCode + end. + +get_dynamic_int(Dst1, Ms, SizeReg, CCode, Signed, LittleEndian, true, + TrueLblName, FalseLblName) -> + {Init, End} = make_dyn_prep(SizeReg, CCode), + Init ++ + get_unknown_size_int(SizeReg, Ms, Dst1, Signed, LittleEndian, + FalseLblName, TrueLblName) ++ + End; +get_dynamic_int(_Dst1, _Ms, _SizeReg, CCode, _Signed, _LittleEndian, false, + _TrueLblName, _FalseLblName) -> + CCode. + +get_int_from_bin(Ms, Size, Dst1, Signed, LittleEndian, + Unsafe, FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, LittleEndian), + NewOffset = hipe_rtl:mk_new_reg_gcsafe(), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, hipe_rtl:mk_imm(Size), BinSize, NewOffset, + Unsafe, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl] ++ + [update_offset(NewOffset, Ms)] ++ + get_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName). + +get_int_from_unaligned_bin(Ms, Size, Dst1, Signed, + UnSafe, FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, false), + NewOffset = hipe_rtl:mk_new_reg_gcsafe(), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, hipe_rtl:mk_imm(Size), BinSize, NewOffset, + UnSafe, hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl] ++ + [update_offset(NewOffset, Ms)] ++ + get_unaligned_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName). + +get_unknown_size_int(SizeReg, Ms, Dst1, Signed, Little, + FalseLblName, TrueLblName) -> + Shiftr = shift_type(Signed), + Type = get_type(Signed, false), + [NewOffset] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + {[Base,Offset,BinSize], ExCode} = + extract_matchstate_vars([base,offset,binsize], Ms), + ExCode ++ + [check_size(Offset, SizeReg, BinSize, NewOffset, + hipe_rtl:label_name(SuccessLbl), FalseLblName), + SuccessLbl, + update_offset(NewOffset, Ms)] ++ + case Little of + true -> + get_little_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName); + false -> + get_big_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) + end. + +make_matchstate(Binary, Max, Ms, TrueLblName, FalseLblName) -> + Base = hipe_rtl:mk_new_reg(), + Orig = hipe_rtl:mk_new_var(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + Lbl = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_gctest(?MS_MIN_SIZE+Max), + get_binary_bytes(Binary, BinSize, Base, Offset, + Orig, hipe_rtl:label_name(Lbl), FalseLblName), + Lbl, + hipe_tagscheme:create_matchstate(Max, BinSize, Base, Offset, Orig, Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +resize_matchstate(Ms, Max, TrueLblName) -> + Base = hipe_rtl:mk_new_reg(), + Orig = hipe_rtl:mk_new_var(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_gctest(?MS_MIN_SIZE+Max), + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize), + get_field_from_term({matchstate, {matchbuffer, base}}, Ms, Base), + get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig), + get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Offset), + hipe_tagscheme:create_matchstate(Max, BinSize, Base, Offset, Orig, Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +reinit_matchstate(Ms, TrueLblName) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Tmp), + set_field_from_term({matchstate, {saveoffset, 0}}, Ms, Tmp), + hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% Binary Code %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_binary_all(Dst1, 1, Ms, TrueLblName, _FalseLblName) -> + [SizeReg] = create_gcsafe_regs(1), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + MakeCode = + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + construct_subbin(Dst1,SizeReg,Offset,Orig)] ++ + [update_offset(BinSize, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ MakeCode; +get_binary_all(Dst1, Unit, Ms, TrueLblName, FalseLblName) -> + [SizeReg] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + SLblName = hipe_rtl:label_name(SuccessLbl), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + MakeCode = + [hipe_rtl:mk_alu(SizeReg, BinSize, sub, Offset)| + test_alignment_code(SizeReg,Unit,SLblName,FalseLblName)] ++ + [SuccessLbl| + construct_subbin(Dst1,SizeReg,Offset,Orig)] ++ + [update_offset(BinSize, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ MakeCode. + +get_binary(Dst1, Ms, SizeReg, + UnSafe, TrueLblName, FalseLblName) -> + [SuccessLbl] = create_lbls(1), + [EndOffset] = create_gcsafe_regs(1), + {[Offset,BinSize,Orig], ExCode} = + extract_matchstate_vars([offset,binsize,orig], Ms), + CheckCode = + [check_size(Offset, SizeReg, BinSize, EndOffset, + UnSafe, hipe_rtl:label_name(SuccessLbl), + FalseLblName), + SuccessLbl], + MakeCode = + construct_subbin(Dst1,SizeReg,Offset,Orig) + ++ [update_offset(EndOffset, Ms), + hipe_rtl:mk_goto(TrueLblName)], + ExCode ++ CheckCode ++ MakeCode. + +construct_subbin(Dst,Size,Offset,Orig) -> + [BitOffset, ByteOffset, BitSize, ByteSize] = create_gcsafe_regs(4), + [hipe_rtl:mk_alu(ByteSize, Size, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(BitSize, Size, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(BitOffset, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_tagscheme:mk_sub_binary(Dst, ByteSize, ByteOffset, + BitSize, BitOffset, Orig)]. + +%%%%%%%%%%%%%%%%%%%%%%%%% Skip Bits %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +skip_bits_all(1, Ms, TrueLblName,_FalseLblName) -> + {[BinSize], ExCode} = extract_matchstate_vars([binsize], Ms), + ExCode ++ + [update_offset(BinSize,Ms), + hipe_rtl:mk_goto(TrueLblName)]; +skip_bits_all(Unit,Ms, TrueLblName,FalseLblName) -> + [Size] = create_gcsafe_regs(1), + [SuccessLbl] = create_lbls(1), + SLblName = hipe_rtl:label_name(SuccessLbl), + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + [hipe_rtl:mk_alu(Size,BinSize,sub,Offset)] + ++ + test_alignment_code(Size,Unit,SLblName,FalseLblName) ++ + [SuccessLbl, + update_offset(BinSize,Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +test_alignment_code(Size,Unit,SLblName,FalseLblName) -> + case Unit of + 1 -> [hipe_rtl:mk_goto(SLblName)]; + 2 -> get_fast_test_code(Size,1,SLblName,FalseLblName); + 4 -> get_fast_test_code(Size,3,SLblName,FalseLblName); + 8 -> get_fast_test_code(Size,7,SLblName,FalseLblName); + 16 -> get_fast_test_code(Size,15,SLblName,FalseLblName); + 32 -> get_fast_test_code(Size,31,SLblName,FalseLblName); + _ -> get_slow_test_code(Size,Unit,SLblName,FalseLblName) + end. + +get_fast_test_code(Size,AndTest,SLblName,FalseLblName) -> + [Tmp] = create_gcsafe_regs(1), + [hipe_rtl:mk_alub(Tmp,Size,'and',hipe_rtl:mk_imm(AndTest), + eq,SLblName,FalseLblName)]. + +%% This is really slow +get_slow_test_code(Size,Unit,SLblName,FalseLblName) -> + [Tmp] = create_gcsafe_regs(1), + [LoopLbl,Lbl1,Lbl2] = create_lbls(3), + LoopLblName = hipe_rtl:label_name(LoopLbl), + Lbl1Name = hipe_rtl:label_name(Lbl1), + Lbl2Name = hipe_rtl:label_name(Lbl2), + [hipe_rtl:mk_move(Tmp,Size), + LoopLbl, + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), SLblName, Lbl1Name), + Lbl1, + hipe_rtl:mk_branch(Tmp, lt, hipe_rtl:mk_imm(0), FalseLblName, Lbl2Name), + Lbl2, + hipe_rtl:mk_alu(Tmp,Tmp,sub,hipe_rtl:mk_imm(Unit)), + hipe_rtl:mk_goto(LoopLblName)]. + +skip_bits2(Ms, NoOfBits, TrueLblName, FalseLblName) -> + [NewOffset] = create_gcsafe_regs(1), + [TempLbl] = create_lbls(1), + {[Offset,BinSize], ExCode} = extract_matchstate_vars([offset,binsize], Ms), + ExCode ++ + add_to_offset(NewOffset, NoOfBits, Offset, FalseLblName) ++ + [hipe_rtl:mk_branch(BinSize, 'ltu', NewOffset, FalseLblName, + hipe_rtl:label_name(TempLbl), 0.01), + TempLbl, + update_offset(NewOffset,Ms), + hipe_rtl:mk_goto(TrueLblName)]. + +add_to_offset(Result, Extra, Original, FalseLblName) -> + TrueLbl = hipe_rtl:mk_new_label(), + %% Note: 'ltu' means 'unsigned overflow'. + [hipe_rtl:mk_alub(Result, Extra, 'add', Original, 'ltu', + FalseLblName, hipe_rtl:label_name(TrueLbl)), + TrueLbl]. + +%%%%%%%%%%%%%%%%%%%%%%% Code for start match %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_binary_bytes(Binary, BinSize, Base, Offset, Orig, + TrueLblName, FalseLblName) -> + [OrigOffset,BitSize,BitOffset] = create_gcsafe_regs(3), + [SuccessLbl,SubLbl,OtherLbl,JoinLbl] = create_lbls(4), + [hipe_tagscheme:test_bitstr(Binary, hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl, + get_field_from_term({sub_binary, binsize}, Binary, BinSize), + hipe_rtl:mk_alu(BinSize, BinSize, sll, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_tagscheme:test_subbinary(Binary, hipe_rtl:label_name(SubLbl), + hipe_rtl:label_name(OtherLbl)), + SubLbl, + get_field_from_term({sub_binary, offset}, Binary, OrigOffset), + hipe_rtl:mk_alu(Offset, OrigOffset, sll, hipe_rtl:mk_imm(?BYTE_SHIFT)), + get_field_from_term({sub_binary, bitoffset}, Binary, BitOffset), + hipe_rtl:mk_alu(Offset, Offset, add, BitOffset), + get_field_from_term({sub_binary, bitsize}, Binary, BitSize), + hipe_rtl:mk_alu(BinSize, BinSize, add, Offset), + hipe_rtl:mk_alu(BinSize, BinSize, add, BitSize), + get_field_from_term({sub_binary, orig}, Binary, Orig), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + OtherLbl, + hipe_rtl:mk_move(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_move(Orig, Binary), + JoinLbl] ++ + get_base(Orig,Base) ++ + [hipe_rtl:mk_goto(TrueLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%% UTILS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +get_base(Orig,Base) -> + [HeapLbl,REFCLbl,EndLbl] = create_lbls(3), + [hipe_tagscheme:test_heap_binary(Orig, hipe_rtl:label_name(HeapLbl), + hipe_rtl:label_name(REFCLbl)), + HeapLbl, + hipe_rtl:mk_alu(Base, Orig, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA-2)), + hipe_rtl:mk_goto(hipe_rtl:label_name(EndLbl)), + REFCLbl, + hipe_rtl:mk_load(Base, Orig, hipe_rtl:mk_imm(?PROC_BIN_BYTES-2)), + EndLbl]. + +extract_matchstate_var(binsize, Ms) -> + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + {BinSize, + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize)}; +extract_matchstate_var(offset, Ms) -> + Offset = hipe_rtl:mk_new_reg_gcsafe(), + {Offset, + get_field_from_term({matchstate, {matchbuffer, offset}}, Ms, Offset)}; +extract_matchstate_var(base, Ms) -> + Base = hipe_rtl:mk_new_reg(), + {Base, + get_field_from_term({matchstate, {matchbuffer, base}}, Ms, Base)}; +extract_matchstate_var(orig, Ms) -> + Orig = hipe_rtl:mk_new_var(), + {Orig, + get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig)}. + +extract_matchstate_vars(List, Ms) -> + lists:unzip([extract_matchstate_var(Name, Ms) || Name <- List]). + +check_size(Offset, Size, BinSize, Tmp1, ContLblName, FalseLblName) -> + [add_to_offset(Tmp1, Offset, Size, FalseLblName), + hipe_rtl:mk_branch(Tmp1, leu, BinSize, ContLblName, FalseLblName, 0.99)]. + +check_size(Offset, Size, _BinSize, Tmp1, true, ContLblName, _FalseLblName) -> + [hipe_rtl:mk_alu(Tmp1, Offset, add, Size), + hipe_rtl:mk_goto(ContLblName)]; +check_size(Offset, Size, BinSize, Tmp1, false, ContLblName, FalseLblName) -> + check_size(Offset, Size, BinSize, Tmp1, ContLblName, FalseLblName). + +shift_type(true) -> + sra; +shift_type(false) -> + srl. + +get_type(true, LittleEndian) -> + {signed, endianess(LittleEndian)}; +get_type(false, LittleEndian) -> + {unsigned, endianess(LittleEndian)}. + +endianess(true) -> + little; +endianess(false) -> + big. + +aligned(Flags) -> + case Flags band ?BSF_ALIGNED of + 1 -> true; + 0 -> false + end. + +littleendian(Flags) -> + case Flags band 2 of + 2 -> true; + 0 -> false + end. + +signed(Flags) -> + case Flags band 4 of + 4 -> true; + 0 -> false + end. + +unsafe(Flags) -> + case Flags band 16 of + 16 -> true; + 0 -> false + end. + +update_offset(NewOffset, Ms) -> + set_field_from_term({matchstate,{matchbuffer,offset}}, + Ms, NewOffset). + +opt_update_ms([NewMs], OldMs) -> + [hipe_rtl:mk_move(NewMs, OldMs)]; +opt_update_ms([], _OldMs) -> + []. + +update_ms(NewMs, OldMs) -> + [hipe_rtl:mk_move(NewMs, OldMs)]. + +create_lbls(0) -> + []; +create_lbls(X) when X > 0-> + [hipe_rtl:mk_new_label()|create_lbls(X-1)]. + +make_dyn_prep(SizeReg, CCode) -> + [CLbl, SuccessLbl] = create_lbls(2), + Init = [hipe_rtl:mk_branch(SizeReg, le, hipe_rtl:mk_imm(?MAX_SMALL_BITS), + hipe_rtl:label_name(SuccessLbl), + hipe_rtl:label_name(CLbl)), + SuccessLbl], + End = [CLbl|CCode], + {Init, End}. + +%%------------------------------------------------------------------------ +%% From hipe_rtl_binutil.erl +%%------------------------------------------------------------------------ + +get_unaligned_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName) -> + [Reg] = create_regs(1), + [get_maybe_unaligned_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + do_bignum_code(Size, Type, Reg, Dst1, TrueLblName)]. + +get_maybe_unaligned_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type) -> + [LowBits] = create_regs(1), + [AlignedLbl, UnAlignedLbl, EndLbl] = create_lbls(3), + [hipe_rtl:mk_alub(LowBits, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS), + eq, hipe_rtl:label_name(AlignedLbl), + hipe_rtl:label_name(UnAlignedLbl)), + AlignedLbl, + get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + hipe_rtl:mk_goto(hipe_rtl:label_name(EndLbl)), + UnAlignedLbl, + get_unaligned_int_to_reg(Reg, Size, Base, Offset, LowBits, Shiftr, Type), + EndLbl]. + +get_unaligned_int_to_reg(Reg, Size, Base, Offset, LowBits, Shiftr, Type) -> + [ByteOffset, ShiftBits, LoadDst, Tmp, TotBits] = create_gcsafe_regs(5), + [MoreLbl, LessLbl, JoinLbl] = create_lbls(3), + WordSize = hipe_rtl_arch:word_size(), + MinLoad = (Size-1) div ?BYTE_SIZE +1, + MaxLoad = MinLoad + 1, + Code1 = + [hipe_rtl:mk_alu(TotBits, LowBits, 'add', hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(ByteOffset, Offset, 'srl', hipe_rtl:mk_imm(?BYTE_SHIFT))], + Code2 = + case {Size rem ?BYTE_SIZE, MinLoad} of + {1, _} -> + [load_bytes(LoadDst, Base, ByteOffset, Type, MinLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MinLoad)*?BYTE_SIZE))]; + {_, WordSize} -> + UnsignedBig = {unsigned, big}, + [hipe_rtl:mk_branch(TotBits, le, hipe_rtl:mk_imm(MinLoad*?BYTE_SIZE), + hipe_rtl:label_name(LessLbl), + hipe_rtl:label_name(MoreLbl)), + LessLbl, + load_bytes(LoadDst, Base, ByteOffset, Type, MinLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MinLoad)*?BYTE_SIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLbl)), + MoreLbl, + load_bytes(LoadDst, Base, ByteOffset, UnsignedBig, MinLoad), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'sll', LowBits), + load_bytes(Tmp, Base, ByteOffset, UnsignedBig, 1), + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', LowBits), + hipe_rtl:mk_alu(Tmp, Tmp, 'srl', LowBits), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_move(ShiftBits, hipe_rtl:mk_imm(0)), + JoinLbl]; + {_, _} -> + [load_bytes(LoadDst, Base, ByteOffset, Type, MaxLoad), + hipe_rtl:mk_alu(ShiftBits, LowBits, 'add', + hipe_rtl:mk_imm((WordSize-MaxLoad)*?BYTE_SIZE))] + end, + Code3 = + [hipe_rtl:mk_alu(Tmp, LoadDst, sll, ShiftBits), + hipe_rtl:mk_alu(Reg, Tmp, Shiftr, + hipe_rtl:mk_imm(WordSize*?BYTE_SIZE-Size))], + Code1 ++ Code2 ++ Code3. + +get_int(Dst1, Size, Base, Offset, Shiftr, Type, TrueLblName) -> + [Reg] = create_gcsafe_regs(1), + [get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type), + do_bignum_code(Size, Type, Reg, Dst1, TrueLblName)]. + +get_int_to_reg(Reg, Size, Base, Offset, Shiftr, Type) -> + [ByteOffset] = create_gcsafe_regs(1), + Code1 = + [hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + load_bytes(Reg, Base, ByteOffset, Type, ((Size-1) div ?BYTE_SIZE +1))], + Code2 = + case Size rem ?BYTE_SIZE of + 0 -> + []; + _ -> + [hipe_rtl:mk_alu(Reg, Reg, Shiftr, + hipe_rtl:mk_imm(?BYTE_SIZE -Size rem ?BYTE_SIZE))] + end, + Code1 ++ Code2. + +get_big_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) -> + [LoadDst, ByteOffset, Limit, Tmp, LowBits] = create_gcsafe_regs(5), + [ContLbl, BackLbl, LoopLbl, TagLbl, LastLbl, EndLbl] = create_lbls(6), + [hipe_rtl:mk_move(LoadDst, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(NewOffset, ne, Offset, hipe_rtl:label_name(ContLbl), + hipe_rtl:label_name(TagLbl), 0.99), + ContLbl, + hipe_rtl:mk_alu(Limit, NewOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Limit, Limit, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + load_bytes(LoadDst, Base, ByteOffset, Type, 1), + BackLbl, + hipe_rtl:mk_branch(ByteOffset, le, Limit, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + LoopLbl, + load_bytes(Tmp, Base, ByteOffset, {unsigned, big}, 1), + hipe_rtl:mk_alu(LoadDst, LoadDst, sll, hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_goto(hipe_rtl:label_name(BackLbl)), + EndLbl, + hipe_rtl:mk_alub(LowBits, NewOffset, 'and', hipe_rtl:mk_imm(?LOW_BITS), eq, + hipe_rtl:label_name(TagLbl), hipe_rtl:label_name(LastLbl)), + LastLbl, + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), 'sub', LowBits), + hipe_rtl:mk_alu(LoadDst, LoadDst, Shiftr, LowBits), + TagLbl] ++ + do_bignum_code(64, Type, LoadDst, Dst1, TrueLblName). + +get_little_unknown_int(Dst1, Base, Offset, NewOffset, + Shiftr, Type, TrueLblName) -> + [LoadDst, ByteOffset, Limit, ShiftReg, LowBits, Tmp] = create_gcsafe_regs(6), + [ContLbl, BackLbl, LoopLbl, DoneLbl, TagLbl] = create_lbls(5), + [hipe_rtl:mk_move(LoadDst, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_branch(NewOffset, ne, Offset, hipe_rtl:label_name(ContLbl), + hipe_rtl:label_name(TagLbl), 0.99), + ContLbl, + hipe_rtl:mk_alu(Tmp, NewOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(ByteOffset, Offset, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_alu(Limit, Tmp, srl, hipe_rtl:mk_imm(?BYTE_SHIFT)), + hipe_rtl:mk_move(ShiftReg, hipe_rtl:mk_imm(0)), + BackLbl, + hipe_rtl:mk_branch(ByteOffset, lt, Limit, + hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(DoneLbl)), + LoopLbl, + load_bytes(Tmp, Base, ByteOffset, {unsigned, big}, 1), + hipe_rtl:mk_alu(Tmp, Tmp, sll, ShiftReg), + hipe_rtl:mk_alu(ShiftReg, ShiftReg, add, hipe_rtl:mk_imm(?BYTE_SIZE)), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + hipe_rtl:mk_goto(hipe_rtl:label_name(BackLbl)), + DoneLbl, + hipe_rtl:mk_alu(LowBits, NewOffset, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + hipe_rtl:mk_alu(LowBits, hipe_rtl:mk_imm(?BYTE_SIZE), sub, LowBits), + hipe_rtl:mk_alu(LowBits, LowBits, 'and', hipe_rtl:mk_imm(?LOW_BITS)), + load_bytes(Tmp, Base, ByteOffset, Type, 1), + hipe_rtl:mk_alu(Tmp, Tmp, Shiftr, LowBits), + hipe_rtl:mk_alu(Tmp, Tmp, sll, ShiftReg), + hipe_rtl:mk_alu(LoadDst, LoadDst, 'or', Tmp), + TagLbl] ++ + do_bignum_code(64, Type, LoadDst, Dst1, TrueLblName). + +do_bignum_code(Size, {Signedness,_}, Src, Dst1, TrueLblName) + when is_integer(Size) -> + case {Size > ?MAX_SMALL_BITS, Signedness} of + {false, _} -> + [hipe_tagscheme:tag_fixnum(Dst1, Src), + hipe_rtl:mk_goto(TrueLblName)]; + {true, signed} -> + make_int_gc_code(Size) ++ + signed_bignum(Dst1, Src, TrueLblName); + {true, unsigned} -> + make_int_gc_code(Size) ++ + unsigned_bignum(Dst1, Src, TrueLblName) + end. + +signed_bignum(Dst1, Src, TrueLblName) -> + Tmp1 = hipe_rtl:mk_new_reg(), + BignumLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:realtag_fixnum(Dst1, Src), + hipe_tagscheme:realuntag_fixnum(Tmp1, Dst1), + hipe_rtl:mk_branch(Tmp1, eq, Src, TrueLblName, + hipe_rtl:label_name(BignumLabel)), + BignumLabel, + hipe_tagscheme:unsafe_mk_big(Dst1, Src, signed), + hipe_rtl:mk_goto(TrueLblName)]. + +unsigned_bignum(Dst1, Src, TrueLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + BignumLbl = hipe_rtl:mk_new_label(), + BignumLblName = hipe_rtl:label_name(BignumLbl), + NxtLbl = hipe_rtl:mk_new_label(), + NxtLblName = hipe_rtl:label_name(NxtLbl), + [hipe_rtl:mk_branch(Src, lt, hipe_rtl:mk_imm(0), BignumLblName, NxtLblName), + NxtLbl, + hipe_tagscheme:realtag_fixnum(Dst1, Src), + hipe_tagscheme:realuntag_fixnum(Tmp1, Dst1), + hipe_rtl:mk_branch(Tmp1, eq, Src, TrueLblName, BignumLblName), + BignumLbl, + hipe_tagscheme:unsafe_mk_big(Dst1, Src, unsigned), + hipe_rtl:mk_goto(TrueLblName)]. + +load_bytes(Dst, Base, Offset, {Signedness, _Endianess},1) -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]; +load_bytes(Dst, Base, Offset, {Signedness, Endianess},2) -> + case Endianess of + big -> + hipe_rtl_arch:load_big_2(Dst, Base, Offset, Signedness); + little -> + hipe_rtl_arch:load_little_2(Dst, Base, Offset, Signedness) + end; +load_bytes(Dst, Base, Offset, {Signedness, Endianess},3) -> + Tmp1 = hipe_rtl:mk_new_reg(), + case Endianess of + big -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))]; + little -> + [hipe_rtl:mk_load(Dst, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte,unsigned), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, Offset, byte,Signedness), + hipe_rtl:mk_alu(Tmp1, Tmp1, sll, hipe_rtl:mk_imm(16)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1))] + end; +load_bytes(Dst, Base, Offset, {Signedness, Endianess}, 4) -> + case Endianess of + big -> + hipe_rtl_arch:load_big_4(Dst, Base, Offset, Signedness); + little -> + hipe_rtl_arch:load_little_4(Dst, Base, Offset, Signedness) + end; + +load_bytes(Dst, Base, Offset, {Signedness, Endianess}, X) when X > 1 -> + [LoopLbl, EndLbl] = create_lbls(2), + [Tmp1, Limit, TmpOffset] = create_regs(3), + case Endianess of + big -> + [hipe_rtl:mk_alu(Limit, Offset, add, hipe_rtl:mk_imm(X)), + hipe_rtl:mk_load(Dst, Base, Offset, byte, Signedness), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + LoopLbl, + hipe_rtl:mk_load(Tmp1, Base, Offset, byte, unsigned), + hipe_rtl:mk_alu(Offset, Offset, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_branch(Offset, lt, Limit, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + EndLbl]; + little -> + [hipe_rtl:mk_alu(Limit, Offset, add, hipe_rtl:mk_imm(X)), + hipe_rtl:mk_alu(TmpOffset, Limit, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Dst, Base, TmpOffset, byte, Signedness), + LoopLbl, + hipe_rtl:mk_alu(TmpOffset, TmpOffset, sub, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_load(Tmp1, Base, TmpOffset, byte, Signedness), + hipe_rtl:mk_alu(Dst, Dst, sll, hipe_rtl:mk_imm(8)), + hipe_rtl:mk_alu(Dst, Dst, 'or', Tmp1), + hipe_rtl:mk_branch(Offset, lt, TmpOffset, hipe_rtl:label_name(LoopLbl), + hipe_rtl:label_name(EndLbl)), + EndLbl, + hipe_rtl:mk_move(Offset, Limit)] + end. + +create_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg()|create_regs(X-1)]; +create_regs(0) -> + []. + +create_gcsafe_regs(X) when X > 0 -> + [hipe_rtl:mk_new_reg_gcsafe()|create_gcsafe_regs(X-1)]; +create_gcsafe_regs(0) -> + []. + +first_part(Var, Register, FalseLblName) -> + [SuccessLbl1, SuccessLbl2] = create_lbls(2), + [hipe_tagscheme:test_fixnum(Var, hipe_rtl:label_name(SuccessLbl1), + FalseLblName, 0.99), + SuccessLbl1, + hipe_tagscheme:fixnum_ge(Var, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + hipe_rtl:label_name(SuccessLbl2), FalseLblName, 0.99), + SuccessLbl2, + hipe_tagscheme:untag_fixnum(Register, Var)]. + +make_size(1, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + {first_part(BitsVar, DstReg, FalseLblName), DstReg}; +make_size(?BYTE_SIZE, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + Code = + first_part(BitsVar, DstReg, FalseLblName) ++ + [hipe_rtl:mk_alu(DstReg, DstReg, sll, hipe_rtl:mk_imm(?BYTE_SHIFT))], + {Code, DstReg}; +make_size(UnitImm, BitsVar, FalseLblName) -> + [DstReg] = create_regs(1), + UnitList = number2list(UnitImm), + Code = multiply_code(UnitList, BitsVar, DstReg, FalseLblName), + {Code, DstReg}. + +multiply_code(List=[Head|_Tail], Variable, Result, FalseLblName) -> + Test = set_high(Head), + Tmp1 = hipe_rtl:mk_new_reg(), + SuccessLbl = hipe_rtl:mk_new_label(), + Register = hipe_rtl:mk_new_reg(), + Code = [hipe_rtl:mk_move(Result, hipe_rtl:mk_imm(0))| + first_part(Variable, Register, FalseLblName)] + ++ + [hipe_rtl:mk_alub(Tmp1, Register, 'and', hipe_rtl:mk_imm(Test), + eq, hipe_rtl:label_name(SuccessLbl), + FalseLblName, 0.99), + SuccessLbl], + multiply_code(List, Register, Result, FalseLblName, Tmp1, Code). + +multiply_code([ShiftSize|Rest], Register, Result, FalseLblName, Tmp1, OldCode) -> + SuccessLbl = hipe_rtl:mk_new_label(), + Code = OldCode ++ [hipe_rtl:mk_alu(Tmp1, Register, sll, hipe_rtl:mk_imm(ShiftSize)), + hipe_rtl:mk_alub(Result, Tmp1, 'add', Result, not_overflow, hipe_rtl:label_name(SuccessLbl), FalseLblName, 0.99), + SuccessLbl], + multiply_code(Rest, Register, Result, FalseLblName, Tmp1, Code); +multiply_code([], _Register, _Result, _FalseLblName, _Tmp1, Code) -> + Code. + +number2list(X) when is_integer(X), X >= 0 -> + number2list(X, []). + +number2list(1, Acc) -> + lists:reverse([0|Acc]); +number2list(0, Acc) -> + lists:reverse(Acc); +number2list(X, Acc) -> + F = floorlog2(X), + number2list(X-(1 bsl F), [F|Acc]). + +floorlog2(X) -> + round(math:log(X)/math:log(2)-0.5). + +set_high(X) -> + set_high(X, 0). + +set_high(0, Y) -> + Y; +set_high(X, Y) -> + set_high(X-1, Y+(1 bsl (27-X))). + +is_illegal_const(Const) -> + Const >= 1 bsl (hipe_rtl_arch:word_size() * ?BYTE_SIZE) orelse Const < 0. diff --git a/lib/hipe/rtl/hipe_rtl_cfg.erl b/lib/hipe/rtl/hipe_rtl_cfg.erl new file mode 100644 index 0000000000..b6c1d63262 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_cfg.erl @@ -0,0 +1,201 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(hipe_rtl_cfg). + +-export([init/1, + labels/1, + params/1, params_update/2, + start_label/1, + succ/2, + pred/2, + bb/2, bb_add/3, bb_insert_between/5, + redirect/4, + remove_trivial_bbs/1, remove_unreachable_code/1, + linearize/1, + pp/1, pp/2]). +-export([preorder/1, postorder/1, reverse_postorder/1]). + +-define(RTL_CFG, true). % needed for cfg.inc below + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). +-include("../flow/cfg.inc"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% CFG interface to RTL. +%% + +init(Rtl) -> + %% hipe_rtl:pp(Rtl), + Code = hipe_rtl:rtl_code(Rtl), + StartLabel = hipe_rtl:label_name(hd(Code)), + CFG0 = mk_empty_cfg(hipe_rtl:rtl_fun(Rtl), + StartLabel, + hipe_rtl:rtl_data(Rtl), + hipe_rtl:rtl_is_closure(Rtl), + hipe_rtl:rtl_is_leaf(Rtl), + hipe_rtl:rtl_params(Rtl)), + CFG = info_update(CFG0, hipe_rtl:rtl_info(Rtl)), + take_bbs(Code, CFG). + +%% @spec is_comment(hipe_rtl:rtl_instruction()) -> boolean() +%% @doc Succeeds if Instr has no effect. +is_comment(Instr) -> + hipe_rtl:is_comment(Instr). + +%% @spec is_goto(hipe_rtl:rtl_instruction()) -> boolean() +%% @doc Succeeds if Instr is just a jump (no side-effects). +is_goto(Instr) -> + hipe_rtl:is_goto(Instr). + +is_label(Instr) -> + hipe_rtl:is_label(Instr). + +label_name(Instr) -> + hipe_rtl:label_name(Instr). + +mk_label(Name) -> + hipe_rtl:mk_label(Name). + +mk_goto(Name) -> + hipe_rtl:mk_goto(Name). + +branch_successors(Instr) -> + case Instr of + #branch{} -> [hipe_rtl:branch_true_label(Instr), + hipe_rtl:branch_false_label(Instr)]; + #alub{} -> [hipe_rtl:alub_true_label(Instr), + hipe_rtl:alub_false_label(Instr)]; + #switch{} -> hipe_rtl:switch_labels(Instr); + #call{} -> + case hipe_rtl:call_fail(Instr) of + [] -> [hipe_rtl:call_continuation(Instr)]; + Fail -> [hipe_rtl:call_continuation(Instr),Fail] + end; + #goto{} -> [hipe_rtl:goto_label(Instr)]; + #goto_index{} -> hipe_rtl:goto_index_labels(Instr); + _ -> [] + end. + +fails_to(Instr) -> + case Instr of + #call{} -> [hipe_rtl:call_fail(Instr)]; + _ -> [] + end. + +is_branch(Instr) -> + case Instr of + #branch{} -> true; + #alub{} -> true; + #switch{} -> true; + #goto{} -> true; + #goto_index{} -> true; + #enter{} -> true; + #return{} -> true; + #call{} -> + case hipe_rtl:call_fail(Instr) of + [] -> + case hipe_rtl:call_continuation(Instr) of + [] -> false; + _ -> true + end; + _ -> true + end; + _ -> false + end. + +is_pure_branch(Instr) -> + case Instr of + #branch{} -> true; + #switch{} -> true; + #goto{} -> true; + _ -> false + end. + +redirect_jmp(Jmp, ToOld, ToNew) -> + hipe_rtl:redirect_jmp(Jmp, ToOld, ToNew). + +redirect_ops([Label|Labels], CFG, Map) -> + BB = bb(CFG, Label), + Code = hipe_bb:code(BB), + NewCode = [rewrite(I,Map) || I <- Code], + NewCFG = bb_add(CFG, Label, hipe_bb:code_update(BB, NewCode)), + redirect_ops(Labels, NewCFG, Map); +redirect_ops([],CFG,_) -> CFG. + +rewrite(I, Map) -> + case I of + #load_address{} -> + case hipe_rtl:load_address_type(I) of + constant -> I; + _ -> + NewL = + find_new_label(hipe_rtl:load_address_addr(I), Map), + hipe_rtl:load_address_addr_update(I, NewL) + end; + _ -> I + end. + + +pp(CFG) -> + hipe_rtl:pp(linearize(CFG)). + +pp(Dev, CFG) -> + hipe_rtl:pp(Dev, linearize(CFG)). + +linearize(CFG) -> + Code = linearize_cfg(CFG), + Rtl = hipe_rtl:mk_rtl(function(CFG), + params(CFG), + is_closure(CFG), + is_leaf(CFG), + Code, + data(CFG), + hipe_gensym:var_range(rtl), + hipe_gensym:label_range(rtl)), + hipe_rtl:rtl_info_update(Rtl, info(CFG)). + +%% %% Warning: this arity might not be the true arity. +%% %% The true arity of a closure usually differs. +%% arity(CFG) -> +%% {_M,_F,A} = function(CFG), +%% A. + +%% init_gensym(CFG)-> +%% HighestVar = find_highest_var(CFG), +%% HighestLabel = find_highest_label(CFG), +%% hipe_gensym:init(), +%% hipe_gensym:set_var(rtl, HighestVar), +%% hipe_gensym:set_label(rtl, HighestLabel). +%% +%% highest_var(Code)-> +%% hipe_rtl:highest_var(Code). + +is_phi(I) -> + hipe_rtl:is_phi(I). + +phi_remove_pred(I, Pred) -> + hipe_rtl:phi_remove_pred(I, Pred). + +phi_redirect_pred(I, OldPred, NewPred) -> + hipe_rtl:phi_redirect_pred(I, OldPred, NewPred). diff --git a/lib/hipe/rtl/hipe_rtl_cleanup_const.erl b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl new file mode 100644 index 0000000000..d3e71a56c1 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_cleanup_const.erl @@ -0,0 +1,85 @@ +%%% -*- erlang-indent-level: 2 -*- +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_cleanup_const.erl +%%% Author : Tobias Lindahl <tobiasl@it.uu.se> +%%% Description : +%%% +%%% Created : 5 Mar 2004 by Tobias Lindahl <tobiasl@it.uu.se> +%%%------------------------------------------------------------------- + +%% Big constants (floats, bignums) can be used as arguments to +%% arbitrary instructions in RTL. Since these are located in the +%% constants area and the only instruction that currently can access +%% them is load_address, the constants have to be moved out of the +%% instruction and loaded into temporary variables before the +%% instruction. +%% +%% Some backends can make use of the information that the arguments +%% are really constants. Here is the place to add new backend-specific +%% behaviour depending on this. + +%%-------------------------------------------------------------------- + +-module(hipe_rtl_cleanup_const). + +-export([cleanup/1]). + +-include("hipe_rtl.hrl"). + +%%-------------------------------------------------------------------- + +%%-spec cleanup(#rtl{}) -> #rtl{}. + +cleanup(Rtl) -> + Code = cleanup(hipe_rtl:rtl_code(Rtl), []), + hipe_rtl:rtl_code_update(Rtl, Code). + +cleanup([I|Left], Acc) -> + Args = hipe_rtl:args(I), + case [X || X <- Args, hipe_rtl:is_const_label(X)] of + [] -> + cleanup(Left, [I|Acc]); + ConstArgs -> + NewIns = cleanup_instr(ConstArgs, I), + cleanup(Left, NewIns ++ Acc) + end; +cleanup([], Acc) -> + lists:reverse(Acc). + +cleanup_instr(Consts, I) -> + cleanup_instr(ordsets:from_list(Consts), I, []). + +cleanup_instr([Const|Left], I, Acc) -> + Dst = hipe_rtl:mk_new_var(), + ConstLabel = hipe_rtl:const_label_label(Const), + Load = hipe_rtl:mk_load_address(Dst, ConstLabel, constant), + case I of + X when is_record(X, fp_unop) orelse is_record(X, fp) -> + Fdst = hipe_rtl:mk_new_fpreg(), + Fconv = hipe_tagscheme:unsafe_untag_float(Fdst, Dst), + NewI = hipe_rtl:subst_uses([{Const, Fdst}], I), + cleanup_instr(Left, NewI, Fconv ++ [Load|Acc]); + _ -> + NewI = hipe_rtl:subst_uses([{Const, Dst}], I), + cleanup_instr(Left, NewI, [Load|Acc]) + end; +cleanup_instr([], I, Acc) -> + [I|Acc]. diff --git a/lib/hipe/rtl/hipe_rtl_exceptions.erl b/lib/hipe/rtl/hipe_rtl_exceptions.erl new file mode 100644 index 0000000000..879b84c0b0 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_exceptions.erl @@ -0,0 +1,120 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_exceptions.erl +%% Module : hipe_rtl_exceptions +%% Purpose : +%% Notes : +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): +%% Created. +%% CVS : +%% $Id$ +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_exceptions). + +-export([gen_fail/3, gen_begin_handler/3]). + +-include("../main/hipe.hrl"). +-include("hipe_literals.hrl"). + +%% -------------------------------------------------------------------- +%% Handle the Icode instruction +%% FAIL +%% +gen_fail(Class, Args, L) -> + case Args of + [Reason] -> + case Class of + exit -> + gen_exit(Reason, L); + throw -> + gen_throw(Reason, L); + error -> + gen_error(Reason, L) + end; + [Arg1,Arg2] -> + case Class of + error -> + Reason = Arg1, ArgList = Arg2, + gen_error(Reason, ArgList, L); + rethrow -> + Exception = Arg1, Reason = Arg2, + gen_rethrow(Exception, Reason, L) + end + end. + +%% -------------------------------------------------------------------- +%% Exception handler glue; interfaces between the runtime system's +%% exception state and the Icode view of exception handling. + +gen_begin_handler(I, VarMap, ConstTab) -> + Ds = hipe_icode:begin_handler_dstlist(I), + {Vars, VarMap1} = hipe_rtl_varmap:ivs2rvs(Ds, VarMap), + [FTagVar,FValueVar,FTraceVar] = Vars, + {[hipe_rtl:mk_comment('begin_handler'), + hipe_rtl_arch:pcb_load(FValueVar, ?P_FVALUE), + hipe_rtl_arch:pcb_load(FTraceVar, ?P_FTRACE), + %% synthesized from P->freason by hipe_handle_exception() + hipe_rtl_arch:pcb_load(FTagVar, ?P_ARG0) + ], + VarMap1, ConstTab}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Exceptions + +gen_exit(Reason, L) -> + gen_fail_call({erlang,exit,1}, [Reason], L). + +gen_throw(Reason, L) -> + gen_fail_call({erlang,throw,1}, [Reason], L). + +gen_error(Reason, L) -> + gen_fail_call({erlang,error,1}, [Reason], L). + +gen_error(Reason, ArgList, L) -> + gen_fail_call({erlang,error,2}, [Reason,ArgList], L). + +gen_rethrow(Exception, Reason, L) -> + gen_fail_call(rethrow, [Exception,Reason], L). + +%% Generic fail. We can't use 'enter' with a fail label (there can be no +%% stack descriptor info for an enter), so for a non-nil fail label we +%% generate a call followed by a dummy return. +%% +%% Update: The runtime system now interprets the return address of +%% the BIF call in order to list the invoking MFA in the stack trace. +%% Generating tailcalls here defeats that purpose, so we no longer do that. + +%%gen_fail_call(Fun, Args, []) -> +%% [hipe_rtl:mk_enter(Fun, Args, remote)]; +gen_fail_call(Fun, Args, L) -> + ContLbl = hipe_rtl:mk_new_label(), + Cont = hipe_rtl:label_name(ContLbl), + Zero = hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(0)), + [hipe_rtl:mk_call([], Fun, Args, Cont, L, remote), + ContLbl, + hipe_rtl:mk_return([Zero])]. diff --git a/lib/hipe/rtl/hipe_rtl_lcm.erl b/lib/hipe/rtl/hipe_rtl_lcm.erl new file mode 100644 index 0000000000..5d65389d48 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_lcm.erl @@ -0,0 +1,1696 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% File : hipe_rtl_lcm.erl +%% Author : Henrik Nyman and Erik Cedheim +%% Description : Performs Lazy Code Motion on RTL +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% This module implements Lazy Code Motion on RTL. +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_lcm). + +-export([rtl_lcm/2]). + +-define(SETS, ordsets). %% Which set implementation module to use + %% We have tried gb_sets, sets and ordsets and + %% ordsets seems to be a lot faster according to + %% our test runs. + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). + +%%-define(LCM_DEBUG, true). %% When defined and true, produces debug printouts + +%%============================================================================= + +%% +%% @doc Performs Lazy Code Motion on RTL. +%% + +-spec rtl_lcm(cfg(), comp_options()) -> cfg(). + +rtl_lcm(CFG, Options) -> + %% Perform pre-calculation of the data sets. + ?opt_start_timer("RTL LCM precalc"), + {NodeInfo, EdgeInfo, AllExpr, ExprMap, IdMap, Labels} = lcm_precalc(CFG, Options), + ?opt_stop_timer("RTL LCM precalc"), + %% {NodeInfo, EdgeInfo, AllExpr, ExprMap, Labels} = + %% ?option_time(lcm_precalc(CFG, Options), "RTL LCM precalc", Options), + + pp_debug("-------------------------------------------------~n",[]), + %% pp_debug( "~w~n", [MFA]), + + %% A check if we should pretty print the result. + case proplists:get_bool(pp_rtl_lcm, Options) of + true-> + pp_debug("-------------------------------------------------~n",[]), + %% pp_debug("AllExpr: ~w~n", [AllExpr]), + pp_debug("AllExpr:~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AllExpr)), + %% pp_sets(ExprMap, NodeInfo, EdgeInfo, AllExpr, CFG2<-ERROR!, Labels); + pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, Labels); + _ -> + ok + end, + + pp_debug("-------------------------------------------------~n",[]), + ?option_time({CFG1, MoveSet} = perform_lcm(CFG, NodeInfo, EdgeInfo, ExprMap, + IdMap, AllExpr, mk_edge_bb_map(), + ?SETS:new(), Labels), + "RTL LCM perform_lcm", Options), + + %% Scan through list of moved expressions and replace their + %% assignments with the new temporary created for that expression + MoveList = ?SETS:to_list(MoveSet), + ?option_time(CFG2 = moved_expr_replace_assignments(CFG1, ExprMap, IdMap, + MoveList), + "RTL LCM moved_expr_replace_assignments", Options), + pp_debug("-------------------------------------------------~n~n",[]), + + CFG2. + +%%============================================================================= +%% Performs lazy code motion given the pre-calculated data sets. +perform_lcm(CFG, _, _, _, _, _, _, MoveSet, []) -> + {CFG, MoveSet}; +perform_lcm(CFG0, NodeInfo, EdgeInfo, ExprMap, IdMap, AllExp, BetweenMap, + MoveSet0, [Label|Labels]) -> + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + DeleteSet = delete(NodeInfo, Label), + + %% Check if something should be deleted from this block. + {CFG1, MoveSet1} = + case ?SETS:size(DeleteSet) > 0 of + true -> + pp_debug("Label ~w: Expressions Deleted: ~n", [Label]), + Code1 = delete_exprs(Code0, ExprMap, IdMap, ?SETS:to_list(DeleteSet)), + BB = hipe_bb:mk_bb(Code1), + {hipe_rtl_cfg:bb_add(CFG0, Label, BB), + ?SETS:union(MoveSet0, DeleteSet)}; + false -> + {CFG0, MoveSet0} + end, + + Succs = hipe_rtl_cfg:succ(CFG1, Label), + + %% Go through the list of successors and insert expression where needed. + %% Also collect a list of expressions that are inserted somewhere + {CFG2, NewBetweenMap, MoveSet2} = + lists:foldl(fun(Succ, {CFG, BtwMap, MoveSet}) -> + InsertSet = calc_insert_edge(NodeInfo, EdgeInfo, + Label, Succ), + %% Check if something should be inserted on this edge. + case ?SETS:size(InsertSet) > 0 of + true -> + pp_debug("Label ~w: Expressions Inserted for Successor: ~w~n", [Label, Succ]), + InsertList = ?SETS:to_list(InsertSet), + {NewCFG, NewBtwMap} = + insert_exprs(CFG, Label, Succ, ExprMap, IdMap, + BtwMap, InsertList), + {NewCFG, NewBtwMap, ?SETS:union(MoveSet, InsertSet)}; + false -> + {CFG, BtwMap, MoveSet} + end + end, + {CFG1, BetweenMap, MoveSet1}, Succs), + + perform_lcm(CFG2, NodeInfo, EdgeInfo, ExprMap, IdMap, AllExp, NewBetweenMap, + MoveSet2, Labels). + +%%============================================================================= +%% Scan through list of moved expressions and replace their +%% assignments with the new temporary created for that expression. +moved_expr_replace_assignments(CFG, _, _, []) -> + CFG; +moved_expr_replace_assignments(CFG0, ExprMap, IdMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + case expr_map_lookup(ExprMap, Expr) of + {value, {_, ReplaceList, NewReg}} -> + CFG1 = lists:foldl(fun({Label, Reg}, CFG) -> + %% Find and replace expression in block + pp_debug("Label ~w: Expressions Replaced:~n", [Label]), + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + Code1 = + moved_expr_do_replacement(expr_set_dst(Expr, Reg), + Reg, NewReg, Code0), + hipe_rtl_cfg:bb_add(CFG, Label, hipe_bb:mk_bb(Code1)) + end, CFG0, ReplaceList), + moved_expr_replace_assignments(CFG1, ExprMap, IdMap, Exprs); + none -> + moved_expr_replace_assignments(CFG0, ExprMap, IdMap, Exprs) + end. + +moved_expr_do_replacement(_, _, _, []) -> + []; +moved_expr_do_replacement(Expr, Reg, NewReg, [Expr|Instrs]) -> + NewExpr = expr_set_dst(Expr, NewReg), + Move = mk_expr_move_instr(Reg, NewReg), + pp_debug(" Replacing:~n", []), + pp_debug_instr(Expr), + pp_debug(" With:~n", []), + pp_debug_instr(NewExpr), + pp_debug_instr(Move), + [NewExpr, Move | moved_expr_do_replacement(Expr, Reg, NewReg, Instrs)]; +moved_expr_do_replacement(Expr, Reg, NewReg, [Instr|Instrs]) -> + [Instr | moved_expr_do_replacement(Expr, Reg, NewReg, Instrs)]. + +%%============================================================================= +%% Goes through the given list of expressions and deletes them from the code. +%% NOTE We do not actually delete an expression, but instead we replace it +%% with an assignment from the new temporary containing the result of the +%% expressions which is guaranteed to have been calculated earlier in +%% the code. +delete_exprs(Code, _, _, []) -> + Code; +delete_exprs(Code, ExprMap, IdMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + %% Perform a foldl that goes through the code and deletes all + %% occurences of the expression. + NewCode = + lists:reverse + (lists:foldl(fun(CodeExpr, Acc) -> + case is_expr(CodeExpr) of + true -> + case expr_clear_dst(CodeExpr) =:= Expr of + true -> + pp_debug(" Deleting: ", []), + pp_debug_instr(CodeExpr), + %% Lookup expression entry. + Defines = + case expr_map_lookup(ExprMap, Expr) of + {value, {_, _, Defs}} -> + Defs; + none -> + exit({?MODULE, expr_map_lookup, + "expression missing"}) + end, + MoveCode = + mk_expr_move_instr(hipe_rtl:defines(CodeExpr), + Defines), + pp_debug(" Replacing with: ", []), + pp_debug_instr(MoveCode), + [MoveCode|Acc]; + false -> + [CodeExpr|Acc] + end; + false -> + [CodeExpr|Acc] + end + end, + [], Code)), + delete_exprs(NewCode, ExprMap, IdMap, Exprs). + +%%============================================================================= +%% Goes through the given list of expressions and inserts them at +%% appropriate places in the code. +insert_exprs(CFG, _, _, _, _, BetweenMap, []) -> + {CFG, BetweenMap}; +insert_exprs(CFG, Pred, Succ, ExprMap, IdMap, BetweenMap, [ExprId|Exprs]) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + Instr = expr_map_get_instr(ExprMap, Expr), + case hipe_rtl_cfg:succ(CFG, Pred) of + [_] -> + pp_debug(" Inserted last: ", []), + pp_debug_instr(Instr), + NewCFG = insert_expr_last(CFG, Pred, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, BetweenMap, Exprs); + _ -> + case hipe_rtl_cfg:pred(CFG, Succ) of + [_] -> + pp_debug(" Inserted first: ", []), + pp_debug_instr(Instr), + NewCFG = insert_expr_first(CFG, Succ, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, BetweenMap, Exprs); + _ -> + pp_debug(" Inserted between: ", []), + pp_debug_instr(Instr), + {NewCFG, NewBetweenMap} = + insert_expr_between(CFG, BetweenMap, Pred, Succ, Instr), + insert_exprs(NewCFG, Pred, Succ, ExprMap, IdMap, NewBetweenMap, Exprs) + end + end. + +%%============================================================================= +%% Recursively goes through the code in a block and returns a new block +%% with the new code inserted second to last (assuming the last expression +%% is a branch operation). +insert_expr_last(CFG0, Label, Instr) -> + Code0 = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + %% FIXME: Use hipe_bb:butlast() instead? + Code1 = insert_expr_last_work(Label, Instr, Code0), + hipe_rtl_cfg:bb_add(CFG0, Label, hipe_bb:mk_bb(Code1)). + +%%============================================================================= +%% Recursively goes through the code in a block and returns a new block +%% with the new code inserted second to last (assuming the last expression +%% is a branch operation). +insert_expr_last_work(_, Instr, []) -> + %% This case should not happen since this means that block was completely + %% empty when the function was called. For compability we insert it last. + [Instr]; +insert_expr_last_work(_, Instr, [Code1]) -> + %% We insert the code next to last. + [Instr, Code1]; +insert_expr_last_work(Label, Instr, [Code|Codes]) -> + [Code|insert_expr_last_work(Label, Instr, Codes)]. + +%%============================================================================= +%% Inserts expression first in the block for the given label. +insert_expr_first(CFG0, Label, Instr) -> + %% The first instruction is always a label + [Lbl|Code0] = hipe_bb:code(hipe_rtl_cfg:bb(CFG0, Label)), + Code1 = [Lbl, Instr | Code0], + hipe_rtl_cfg:bb_add(CFG0, Label, hipe_bb:mk_bb(Code1)). + +%%============================================================================= +%% Inserts an expression on and edge between two existing blocks. +%% It creates a new basic block to hold the expression. +%% Created bbs are inserted into BetweenMap to be able to reuse them for +%% multiple inserts on the same edge. +%% NOTE Currently creates multiple blocks for identical expression with the +%% same successor. Since the new bb usually contains very few instructions +%% this should not be a problem. +insert_expr_between(CFG0, BetweenMap, Pred, Succ, Instr) -> + PredSucc = {Pred, Succ}, + case edge_bb_map_lookup(BetweenMap, PredSucc) of + none -> + NewLabel = hipe_rtl:mk_new_label(), + NewLabelName = hipe_rtl:label_name(NewLabel), + pp_debug(" Creating new bb ~w~n", [NewLabel]), + Code = [Instr, hipe_rtl:mk_goto(Succ)], + CFG1 = hipe_rtl_cfg:bb_add(CFG0, NewLabelName, hipe_bb:mk_bb(Code)), + CFG2 = hipe_rtl_cfg:redirect(CFG1, Pred, Succ, NewLabelName), + NewBetweenMap = edge_bb_map_insert(BetweenMap, PredSucc, NewLabelName), + pp_debug(" Mapping edge (~w,~w) to label ~w~n", + [Pred, Succ, NewLabelName]), + {CFG2, NewBetweenMap}; + {value, Label} -> + pp_debug(" Using existing new bb for edge (~w,~w) with label ~w~n", + [Pred, Succ, Label]), + {insert_expr_last(CFG0, Label, Instr), BetweenMap} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%% GENERAL UTILITY FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Returns true if the list of registers only contains virtual registers and +%% no machine registers. +no_machine_regs([]) -> + true; +no_machine_regs([Reg|Regs]) -> + case hipe_rtl:is_reg(Reg) of + true -> + N = hipe_rtl:reg_index(Reg), + (N >= hipe_rtl_arch:first_virtual_reg()) andalso no_machine_regs(Regs); + _ -> + case hipe_rtl:is_fpreg(Reg) of + true -> + N = hipe_rtl:fpreg_index(Reg), + (N >= hipe_rtl_arch:first_virtual_reg()) andalso no_machine_regs(Regs); + _ -> + no_machine_regs(Regs) + end + end. + +%%============================================================================= +%% Returns true if an RTL instruction is an expression. +%% +is_expr(I) -> + Defines = hipe_rtl:defines(I), + Uses = hipe_rtl:uses(I), + + %% We don't cosider something that doesn't define anything as an expression. + %% Also we don't consider machine registers to be expressions. + case length(Defines) > 0 andalso no_machine_regs(Defines) + andalso no_machine_regs(Uses) of + true -> + case I of + #alu{} -> true; +%% #alu{} -> +%% Dst = hipe_rtl:alu_dst(I), +%% Src1 = hipe_rtl:alu_src1(I), +%% Src2 = hipe_rtl:alu_src2(I), + + %% Check if dst updates src +%% case Dst =:= Src1 orelse Dst =:= Src2 of +%% true -> +%% false; +%% false -> +%% true +%% end; + + %% Check if alu expression is untagging of boxed (rX <- vX sub 2) +%% case hipe_rtl:is_reg(Dst) andalso hipe_rtl:is_var(Src1) andalso +%% (hipe_rtl:alu_op(I) =:= sub) andalso hipe_rtl:is_imm(Src2) of +%% true -> +%% case hipe_rtl:imm_value(Src2) of +%% 2 -> false; %% Tag for boxed. TODO: Should not be hardcoded... +%% _ -> true +%% end; +%% false -> +%% true +%% end; + + #alub{} -> false; %% TODO: Split instruction to consider alu expression? + #branch{} -> false; + #call{} -> false; %% We cannot prove that a call has no side-effects + #comment{} -> false; + #enter{} -> false; + %% #fail_to{} -> false; %% Deprecated? + #fconv{} -> true; + #fixnumop{} -> true; + #fload{} -> true; + #fmove{} -> false; + #fp{} -> true; + #fp_unop{} -> true; + #fstore{} -> false; + #goto{} -> false; + #goto_index{} -> false; + #gctest{} -> false; + #label{} -> false; + #load{} -> true; + #load_address{} -> + case hipe_rtl:load_address_type(I) of + c_const -> false; + closure -> false; %% not sure whether safe to move; + %% also probably not worth it + constant -> true + end; + #load_atom{} -> true; + #load_word_index{} -> true; + #move{} -> false; + #multimove{} -> false; + #phi{} -> false; + #return{} -> false; + #store{} -> false; + #switch{} -> false + end; + false -> + false + end. + +%%============================================================================= +%% Replaces destination of RTL expression with empty list. +%% +expr_set_dst(I, [Dst|_Dsts] = DstList) -> + case I of + #alu{} -> hipe_rtl:alu_dst_update(I, Dst); + #call{} -> hipe_rtl:call_dstlist_update(I, DstList); + #fconv{} -> hipe_rtl:fconv_dst_update(I, Dst); + #fixnumop{} -> hipe_rtl:fixnumop_dst_update(I, Dst); + #fload{} -> hipe_rtl:fload_dst_update(I, Dst); + %% #fmove{} -> hipe_rtl:fmove_dst_update(I, Dst); + #fp{} -> hipe_rtl:fp_dst_update(I, Dst); + #fp_unop{} -> hipe_rtl:fp_unop_dst_update(I, Dst); + #load{} -> hipe_rtl:load_dst_update(I, Dst); + #load_address{} -> hipe_rtl:load_address_dst_update(I, Dst); + #load_atom{} -> hipe_rtl:load_atom_dst_update(I, Dst); + #load_word_index{} -> hipe_rtl:load_word_index_dst_update(I, Dst); + %% #move{} -> hipe_rtl:move_dst_update(I, Dst); + _ -> exit({?MODULE, expr_set_dst, "bad expression"}) + end. + +%%============================================================================= +%% Replaces destination of RTL expression with empty list. +%% +expr_clear_dst(I) -> + case I of + #alu{} -> hipe_rtl:alu_dst_update(I, nil); + #call{} -> hipe_rtl:call_dstlist_update(I, nil); + #fconv{} -> hipe_rtl:fconv_dst_update(I, nil); + #fixnumop{} -> hipe_rtl:fixnumop_dst_update(I, nil); + #fload{} -> hipe_rtl:fload_dst_update(I, nil); + %% #fmove{} -> hipe_rtl:fmove_dst_update(I, nil); + #fp{} -> hipe_rtl:fp_dst_update(I, nil); + #fp_unop{} -> hipe_rtl:fp_unop_dst_update(I, nil); + #load{} -> hipe_rtl:load_dst_update(I, nil); + #load_address{} -> hipe_rtl:load_address_dst_update(I, nil); + #load_atom{} -> hipe_rtl:load_atom_dst_update(I, nil); + #load_word_index{} -> hipe_rtl:load_word_index_dst_update(I, nil); + %% #move{} -> hipe_rtl:move_dst_update(I, nil); + _ -> exit({?MODULE, expr_clear_dst, "bad expression"}) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% PRECALC FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Pre-calculates the flow analysis and puts the calculated sets in maps for +%% easy access later. +lcm_precalc(CFG, Options) -> + %% Calculate use map and expression map. + ?option_time({ExprMap, IdMap} = mk_expr_map(CFG), + "RTL LCM mk_expr_map", Options), + ?option_time(UseMap = mk_use_map(CFG, ExprMap), + "RTL LCM mk_use_map", Options), + %% Labels = hipe_rtl_cfg:reverse_postorder(CFG), + Labels = hipe_rtl_cfg:labels(CFG), + %% StartLabel = hipe_rtl_cfg:start_label(CFG), + %% AllExpr = all_exprs(CFG, Labels), + AllExpr = ?SETS:from_list(gb_trees:keys(IdMap)), + + %% Calculate the data sets. + ?option_time(NodeInfo0 = mk_node_info(Labels), "RTL LCM mk_node_info", + Options), + %% ?option_time(EdgeInfo0 = mk_edge_info(), "RTL LCM mk_edge_info", + %% Options), + EdgeInfo0 = mk_edge_info(), + ?option_time(NodeInfo1 = calc_up_exp(CFG, ExprMap, NodeInfo0, Labels), + "RTL LCM calc_up_exp", Options), + ?option_time(NodeInfo2 = calc_down_exp(CFG, ExprMap, NodeInfo1, Labels), + "RTL LCM calc_down_exp", Options), + ?option_time(NodeInfo3 = calc_killed_expr(CFG, NodeInfo2, UseMap, AllExpr, + Labels), + "RTL LCM calc_killed_exp", Options), + ?option_time(NodeInfo4 = calc_avail(CFG, NodeInfo3), + "RTL LCM calc_avail", Options), + ?option_time(NodeInfo5 = calc_antic(CFG, NodeInfo4, AllExpr), + "RTL LCM calc_antic", Options), + ?option_time(EdgeInfo1 = calc_earliest(CFG, NodeInfo5, EdgeInfo0, Labels), + "RTL LCM calc_earliest", Options), + ?option_time({NodeInfo6, EdgeInfo2} = calc_later(CFG, NodeInfo5, EdgeInfo1), + "RTL LCM calc_later", Options), + ?option_time(NodeInfo7 = calc_delete(CFG, NodeInfo6, Labels), + "RTL LCM calc_delete", Options), + {NodeInfo7, EdgeInfo2, AllExpr, ExprMap, IdMap, Labels}. + +%%%%%%%%%%%%%%%%%%% AVAILABLE IN/OUT FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculation of anticipated in/out sets. +%% Uses a worklist algorithm. +%% Performs the avail in/out flow analysis. + +%%============================================================================= +%% Calculates the available in/out sets, and returns an updated NodeInfo. + +calc_avail(CFG, NodeInfo) -> + StartLabel = hipe_rtl_cfg:start_label(CFG), + Work = init_work([StartLabel]), + %% Initialize start node + NewNodeInfo = set_avail_in(NodeInfo, StartLabel, ?SETS:new()), + calc_avail_fixpoint(Work, CFG, NewNodeInfo). + +calc_avail_fixpoint(Work, CFG, NodeInfo) -> + case get_work(Work) of + fixpoint -> + NodeInfo; + {Label, NewWork} -> + {NewNodeInfo, NewLabels} = calc_avail_node(Label, CFG, NodeInfo), + NewWork2 = add_work(NewWork, NewLabels), + calc_avail_fixpoint(NewWork2, CFG, NewNodeInfo) + end. + +calc_avail_node(Label, CFG, NodeInfo) -> + %% Get avail in + AvailIn = avail_in(NodeInfo, Label), + + %% Calculate avail out + AvailOut = ?SETS:union(down_exp(NodeInfo, Label), + ?SETS:subtract(AvailIn, + killed_expr(NodeInfo, Label))), + + {Changed, NodeInfo2} = + case avail_out(NodeInfo, Label) of + none -> + %% If there weren't any old avail out we use this one. + {true, set_avail_out(NodeInfo, Label, AvailOut)}; + OldAvailOut -> + %% Check if the avail outs are equal. + case AvailOut =:= OldAvailOut of + true -> + {false, NodeInfo}; + false -> + {true, set_avail_out(NodeInfo, Label, AvailOut)} + end + end, + + case Changed of + true -> + %% Update AvailIn-sets of successors and add them to worklist + Succs = hipe_rtl_cfg:succ(CFG, Label), + NodeInfo3 = + lists:foldl + (fun(Succ, NewNodeInfo) -> + case avail_in(NewNodeInfo, Succ) of + none -> + %% Initialize avail in to all expressions + set_avail_in(NewNodeInfo, Succ, AvailOut); + OldAvailIn -> + set_avail_in(NewNodeInfo, Succ, + ?SETS:intersection(OldAvailIn, AvailOut)) + end + end, + NodeInfo2, Succs), + {NodeInfo3, Succs}; + false -> + {NodeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%% ANTICIPATED IN/OUT FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculation of anticipated in/out sets. +%% Uses a worklist algorithm. + +%%============================================================================= +%% Calculates the anicipated in/out sets, and returns an updated NodeInfo. +calc_antic(CFG, NodeInfo, AllExpr) -> + %% Initialize worklist with all nodes in postorder + Labels = hipe_rtl_cfg:postorder(CFG), + Work = init_work(Labels), + calc_antic_fixpoint(Work, CFG, NodeInfo, AllExpr). + +calc_antic_fixpoint(Work, CFG, NodeInfo, AllExpr) -> + case get_work(Work) of + fixpoint -> + NodeInfo; + {Label, NewWork} -> + {NewNodeInfo, NewLabels} = calc_antic_node(Label, CFG, NodeInfo, AllExpr), + NewWork2 = add_work(NewWork, NewLabels), + calc_antic_fixpoint(NewWork2, CFG, NewNodeInfo, AllExpr) + end. + +calc_antic_node(Label, CFG, NodeInfo, AllExpr) -> + %% Get antic out + AnticOut = + case antic_out(NodeInfo, Label) of + none -> + case is_exit_label(CFG, Label) of + true -> + ?SETS:new(); + false -> + AllExpr + end; + + AnticOutTemp -> AnticOutTemp + end, + + %% Calculate antic in + AnticIn = ?SETS:union(up_exp(NodeInfo, Label), + ?SETS:subtract(AnticOut, + killed_expr(NodeInfo, Label))), + {Changed, NodeInfo2} = + case antic_in(NodeInfo, Label) of + %% If there weren't any old antic in we use this one. + none -> + {true, set_antic_in(NodeInfo, Label, AnticIn)}; + + OldAnticIn -> + %% Check if the antic in:s are equal. + case AnticIn =:= OldAnticIn of + true -> + {false, NodeInfo}; + false -> + {true, + set_antic_in(NodeInfo, Label, AnticIn)} + end + end, + + case Changed of + true -> + %% Update AnticOut-sets of predecessors and add them to worklist + Preds = hipe_rtl_cfg:pred(CFG, Label), + NodeInfo3 = + lists:foldl + (fun(Pred, NewNodeInfo) -> + case antic_out(NewNodeInfo, Pred) of + none -> + %% Initialize antic out to all expressions + set_antic_out(NewNodeInfo, Pred, AnticIn); + OldAnticOut -> + set_antic_out(NewNodeInfo, Pred, + ?SETS:intersection(OldAnticOut, AnticIn)) + end + end, + NodeInfo2, Preds), + {NodeInfo3, Preds}; + false -> + {NodeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%%%%% LATER / LATER IN FLOW ANALYSIS %%%%%%%%%%%%%%%%%%%%%%%%%% +%% Fixpoint calculations of Later and LaterIn sets. +%% Uses a worklist algorithm. +%% Note that the Later set is calculated on edges. + +%%============================================================================= +%% Calculates the Later and LaterIn sets, and returns updates of both +%% NodeInfo (with LaterIn sets) and EdgeInfo (with Later sets). + +calc_later(CFG, NodeInfo, EdgeInfo) -> + StartLabel = hipe_rtl_cfg:start_label(CFG), + Work = init_work([{node, StartLabel}]), + %% Initialize start node + NewNodeInfo = set_later_in(NodeInfo, StartLabel, ?SETS:new()), + calc_later_fixpoint(Work, CFG, NewNodeInfo, EdgeInfo). + +calc_later_fixpoint(Work, CFG, NodeInfo, EdgeInfo) -> + case get_work(Work) of + {{edge, From, To}, Work2} -> + {NewNodeInfo, NewEdgeInfo, AddWork} = + calc_later_edge(From, To, CFG, NodeInfo, EdgeInfo), + Work3 = add_work(Work2, AddWork), + calc_later_fixpoint(Work3, CFG, NewNodeInfo, NewEdgeInfo); + {{node, Label}, Work2} -> + AddWork = calc_later_node(Label, CFG), + Work3 = add_work(Work2, AddWork), + calc_later_fixpoint(Work3, CFG, NodeInfo, EdgeInfo); + fixpoint -> + {NodeInfo, EdgeInfo} + end. + +calc_later_node(Label, CFG) -> + Succs = hipe_rtl_cfg:succ(CFG, Label), + [{edge, Label, Succ} || Succ <- Succs]. + +calc_later_edge(From, To, _CFG, NodeInfo, EdgeInfo) -> + FromTo = {From, To}, + Earliest = earliest(EdgeInfo, FromTo), + LaterIn = later_in(NodeInfo, From), + UpExp = up_exp(NodeInfo, From), + Later = ?SETS:union(Earliest, ?SETS:subtract(LaterIn, UpExp)), + {Changed, EdgeInfo2} = + case lookup_later(EdgeInfo, FromTo) of + none -> {true, set_later(EdgeInfo, FromTo, Later)}; + Later -> {false, EdgeInfo}; + _Old -> {true, set_later(EdgeInfo, FromTo, Later)} + end, + case Changed of + true -> + %% Update later in set of To-node + case lookup_later_in(NodeInfo, To) of + %% If the data isn't set initialize to all expressions + none -> + {set_later_in(NodeInfo, To, Later), EdgeInfo2, [{node, To}]}; + OldLaterIn -> + NewLaterIn = ?SETS:intersection(OldLaterIn, Later), + %% Check if something changed + %% FIXME: Implement faster equality test? + case NewLaterIn =:= OldLaterIn of + true -> + {NodeInfo, EdgeInfo2, []}; + false -> + {set_later_in(NodeInfo, To, NewLaterIn), + EdgeInfo2, [{node, To}]} + end + end; + false -> + {NodeInfo, EdgeInfo2, []} + end. + +%%%%%%%%%%%%%%%%%% UPWARDS/DOWNWARDS EXPOSED EXPRESSIONS %%%%%%%%%%%%%%%%%%%%%% +%% Calculates upwards and downwards exposed expressions. + +%%============================================================================= +%% Calculates the downwards exposed expression sets for the given labels in +%% the CFG. +calc_down_exp(_, _, NodeInfo, []) -> + NodeInfo; +calc_down_exp(CFG, ExprMap, NodeInfo, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + %% Data = ?SETS:from_list(lists:map(fun expr_clear_dst/1, exp_work(Code))), + Data = ?SETS:from_list(get_expr_ids(ExprMap, exp_work(Code))), + NewNodeInfo = set_down_exp(NodeInfo, Label, Data), + calc_down_exp(CFG, ExprMap, NewNodeInfo, Labels). + +%%============================================================================= +%% Calculates the upwards exposed expressions sets for the given labels in +%% the CFG. +calc_up_exp(_, _, NodeInfo, []) -> + NodeInfo; +calc_up_exp(CFG, ExprMap, NodeInfo, [Label|Labels]) -> + BB = hipe_rtl_cfg:bb(CFG, Label), + RevCode = lists:reverse(hipe_bb:code(BB)), + Data = ?SETS:from_list(get_expr_ids(ExprMap, exp_work(RevCode))), + NewNodeInfo = set_up_exp(NodeInfo, Label, Data), + calc_up_exp(CFG, ExprMap, NewNodeInfo, Labels). + +%%============================================================================= +%% Given a list of expression instructions, gets a list of expression ids +%% from an expression map. +get_expr_ids(ExprMap, Instrs) -> + [expr_map_get_id(ExprMap, expr_clear_dst(I)) || I <- Instrs]. + +%%============================================================================= +%% Does the work of the calc_*_exp functions. +exp_work(Code) -> + exp_work([], Code). + +exp_work([], [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + exp_work([Instr], Instrs); + false -> + exp_work([], Instrs) + end; +exp_work(Exprs, []) -> + Exprs; +exp_work(Exprs, [Instr|Instrs]) -> + NewExprs = case is_expr(Instr) of + true -> + exp_kill_expr(Instr, [Instr|Exprs]); + false -> + exp_kill_expr(Instr, Exprs) + end, + exp_work(NewExprs, Instrs). + +%%============================================================================= +%% Checks if the given instruction redefines any operands of +%% instructions in the instruction list. +%% It returns the list of expressions with those instructions that has +%% operands redefined removed. +exp_kill_expr(_Instr, []) -> + []; +exp_kill_expr(Instr, [CheckedExpr|Exprs]) -> + %% Calls, gctests and stores potentially clobber everything + case Instr of + #call{} -> []; + #gctest{} -> []; + #store{} -> []; %% FIXME: Only regs and vars clobbered, not fregs... + #fstore{} -> + %% fstore potentially clobber float expressions + [ExprDefine|_] = hipe_rtl:defines(CheckedExpr), + case hipe_rtl:is_fpreg(ExprDefine) of + true -> + exp_kill_expr(Instr, Exprs); + false -> + [CheckedExpr | exp_kill_expr(Instr, Exprs)] + end; + _ -> + InstrDefines = hipe_rtl:defines(Instr), + ExprUses = hipe_rtl:uses(CheckedExpr), + Diff = ExprUses -- InstrDefines, + case length(Diff) < length(ExprUses) of + true -> + exp_kill_expr(Instr, Exprs); + false -> + [CheckedExpr | exp_kill_expr(Instr, Exprs)] + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%% KILLED EXPRESSIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the killed expression sets for all given labels. +calc_killed_expr(_, NodeInfo, _, _, []) -> + NodeInfo; +calc_killed_expr(CFG, NodeInfo, UseMap, AllExpr, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + KilledExprs = calc_killed_expr_bb(Code, UseMap, AllExpr, ?SETS:new()), + NewNodeInfo = set_killed_expr(NodeInfo, Label, KilledExprs), + calc_killed_expr(CFG, NewNodeInfo, UseMap, AllExpr, Labels). + +%%============================================================================= +%% Calculates the killed expressions set for one basic block. +calc_killed_expr_bb([], _UseMap, _AllExpr, KilledExprs) -> + KilledExprs; +calc_killed_expr_bb([Instr|Instrs], UseMap, AllExpr, KilledExprs) -> + %% Calls, gctests and stores potentially clobber everything + case Instr of + #call{} -> AllExpr; + #gctest{} -> AllExpr; + #store{} -> AllExpr; %% FIXME: Only regs and vars clobbered, not fregs... + #fstore{} -> + %% Kill all float expressions + %% FIXME: Make separate function is_fp_expr + ?SETS:from_list + (lists:foldl(fun(Expr, Fexprs) -> + [Define|_] = hipe_rtl:defines(Expr), + case hipe_rtl:is_fpreg(Define) of + true -> + [Expr|Fexprs]; + false -> + Fexprs + end + end, [], ?SETS:to_list(AllExpr))); + _ -> + case hipe_rtl:defines(Instr) of + [] -> + calc_killed_expr_bb(Instrs, UseMap, AllExpr, KilledExprs); + [Define|_] -> + NewKilledExprs = use_map_get_expr_uses(UseMap, Define), + calc_killed_expr_bb(Instrs, UseMap, AllExpr, + ?SETS:union(NewKilledExprs, KilledExprs)) + end + end. +%%%%%%%%%%%%%%%%%%%%%%%%%%%% EARLIEST %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the earliest set for all edges in the CFG. + +calc_earliest(_, _, EdgeInfo, []) -> + EdgeInfo; +calc_earliest(CFG, NodeInfo, EdgeInfo, [To|Labels]) -> + EmptySet = ?SETS:new(), + Preds = hipe_rtl_cfg:pred(CFG, To), + NewEdgeInfo = + case EmptySet =:= antic_in(NodeInfo, To) of + true -> + %% Earliest is empty for all edges into this block. + lists:foldl(fun(From, EdgeInfoAcc) -> + set_earliest(EdgeInfoAcc, {From, To}, EmptySet) + end, EdgeInfo, Preds); + false -> + lists:foldl(fun(From, EdgeInfoAcc) -> + IsStartLabel = (From =:= hipe_rtl_cfg:start_label(CFG)), + Earliest = + calc_earliest_edge(NodeInfo, IsStartLabel, From, To), + set_earliest(EdgeInfoAcc, {From, To}, Earliest) + end, EdgeInfo, Preds) + end, + calc_earliest(CFG, NodeInfo, NewEdgeInfo, Labels). + +%%============================================================================= +%% Calculates the earliest set for one edge. + +calc_earliest_edge(NodeInfo, IsStartLabel, From, To) -> + AnticIn = antic_in(NodeInfo, To), + AvailOut = avail_out(NodeInfo, From), + + case IsStartLabel of + true -> + ?SETS:subtract(AnticIn, AvailOut); + false -> + AnticOut = antic_out(NodeInfo, From), + ExprKill = killed_expr(NodeInfo, From), + ?SETS:subtract(?SETS:subtract(AnticIn, AvailOut), + ?SETS:subtract(AnticOut, ExprKill)) + end. +%% The above used to be: +%% +%% ?SETS:intersection(?SETS:subtract(AnticIn, AvailOut), +%% ?SETS:union(ExprKill, ?SETS:subtract(AllExpr, AnticOut))) +%% +%% But it is costly to use the AllExpr, so let's do some tricky set algebra. +%% +%% Let A = AnticIn, B = AvailOut, C = ExprKill, D = AnticOut, U = AllExpr +%% Let n = intersection, u = union, ' = inverse +%% +%% Then +%% (A - B) n (C u (U - D)) = <Remove D unless it is in C> +%% = (A - B) n ((C u U) - (D - C)) = <But U is the whole universe> +%% = (A - B) n (U - (D - C)) = <We are really meaning the complement> +%% = (A - B) n (D - C)' = <Intersection w complement is subtraction> +%% = (A - B) - (D - C) <Simple enough, let's stop> +%% +%% or in other words +%% ?SETS:subtract(?SETS:subtract(AnticIn, AvailOut), +%% ?SETS:subtract(AnticOut, ExprKill)) + + + +%%%%%%%%%%%%%%%%%%%%%%%% INSERT / DELETE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Calculates the insert set for one edge and returns the resulting set. +%% NOTE This does not modify the EdgeInfo set, since the resulting set is +%% returned and used immediately, instead of being pre-calculated as are +%% the other sets. +calc_insert_edge(NodeInfo, EdgeInfo, From, To) -> + Later = later(EdgeInfo, {From, To}), + LaterIn = later_in(NodeInfo, To), + ?SETS:subtract(Later, LaterIn). + +%%============================================================================= +%% Calculates the delete set for all given labels in a CFG. +calc_delete(_, NodeInfo, []) -> + NodeInfo; +calc_delete(CFG, NodeInfo, [Label|Labels]) -> + case Label =:= hipe_rtl_cfg:start_label(CFG) of + true -> + NewNodeInfo = set_delete(NodeInfo, Label, ?SETS:new()); + false -> + UpExp = up_exp(NodeInfo, Label), + LaterIn = later_in(NodeInfo, Label), + Delete = ?SETS:subtract(UpExp, LaterIn), + NewNodeInfo = set_delete(NodeInfo, Label, Delete) + end, + calc_delete(CFG, NewNodeInfo, Labels). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% FIXPOINT FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Worklist used by the fixpoint calculations. +%% +%% We use gb_sets here, which is optimized for continuous inserts and +%% membership tests. + +init_work(Labels) -> + {Labels, [], gb_sets:from_list(Labels)}. + +get_work({[Label|Left], List, Set}) -> + NewWork = {Left, List, gb_sets:delete(Label, Set)}, + {Label, NewWork}; +get_work({[], [], _Set}) -> + fixpoint; +get_work({[], List, Set}) -> + get_work({lists:reverse(List), [], Set}). + +add_work(Work = {List1, List2, Set}, [Label|Labels]) -> + case gb_sets:is_member(Label, Set) of + true -> + add_work(Work, Labels); + false -> + %%io:format("Adding work: ~w\n", [Label]), + add_work({List1, [Label|List2], gb_sets:insert(Label, Set)}, Labels) + end; +add_work(Work, []) -> + Work. + +%%============================================================================= +%% Calculates the labels that are the exit labels. +%% FIXME We do not detect dead-end loops spanning more than one block. +%% This could potentially cause a bug in the future... +%% exit_labels(CFG) -> +%% Labels = hipe_rtl_cfg:labels(CFG), +%% lists:foldl(fun(Label, ExitLabels) -> +%% Succs = hipe_rtl_cfg:succ(CFG, Label), +%% case Succs of +%% [] -> +%% [Label|ExitLabels]; +%% [Label] -> %% Count single bb dead-end loops as exit labels +%% [Label|ExitLabels]; +%% _ -> +%% ExitLabels +%% end +%% end, [], Labels ). + +%%============================================================================= +%% Return true if label is an exit label, +%% i.e. its bb has no successors or itself as only successor. +is_exit_label(CFG, Label) -> + case hipe_rtl_cfg:succ(CFG, Label) of + [] -> true; + [Label] -> true; + _ -> false + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%% DATASET FUNCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The dataset is a collection of data about the CFG. +%% It is divided into two parts, NodeInfo and EdgeInfo. +%% The pre-calculation step stores the calculated sets here. + +-record(node_data, {up_exp = none, + down_exp = none, + killed_expr = none, + avail_in = none, + avail_out = none, + antic_in = none, + antic_out = none, + later_in = none, + delete = none}). + +-record(edge_data, {earliest = none, + later = none, + insert = none}). + +%%============================================================================= +%% Creates a node info from a CFG (one entry for each Label). +mk_node_info(Labels) -> + lists:foldl(fun(Label, DataTree) -> + gb_trees:insert(Label, #node_data{}, DataTree) + %%gb_trees:enter(Label, #node_data{}, DataTree) + end, + gb_trees:empty(), Labels). + +%%mk_edge_info(Labels) -> +%% FIXME Should we traverse cfg and initialize edges? +mk_edge_info() -> + gb_trees:empty(). + +%%============================================================================= +%% Get methods +up_exp(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.up_exp. + +down_exp(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.down_exp. + +killed_expr(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.killed_expr. + +avail_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.avail_in. + +avail_out(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.avail_out. + +antic_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.antic_in. + +antic_out(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.antic_out. + +later_in(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.later_in. + +lookup_later_in(NodeInfo, Label) -> + case gb_trees:lookup(Label, NodeInfo) of + none -> + none; + {value, #node_data{later_in = Data}} -> + Data + end. + +delete(NodeInfo, Label) -> + Data = gb_trees:get(Label, NodeInfo), + Data#node_data.delete. + +earliest(EdgeInfo, Edge) -> + Data = gb_trees:get(Edge, EdgeInfo), + Data#edge_data.earliest. + +-ifdef(LOOKUP_EARLIEST_NEEDED). +lookup_earliest(EdgeInfo, Edge) -> + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + none; + {value, #edge_data{earliest = Data}} -> + Data + end. +-endif. + +later(EdgeInfo, Edge) -> + Data = gb_trees:get(Edge, EdgeInfo), + Data#edge_data.later. + +lookup_later(EdgeInfo, Edge) -> + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + none; + {value, #edge_data{later = Data}} -> + Data + end. + +%% insert(EdgeInfo, Edge) -> +%% case gb_trees:lookup(Edge, EdgeInfo) of +%% none -> +%% exit({?MODULE, insert, "edge info not found"}), +%% none; +%% {value, #edge_data{insert = Data}} -> +%% Data +%% end. + +%%============================================================================= +%% Set methods +set_up_exp(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{up_exp = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{up_exp = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_down_exp(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{down_exp = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{down_exp = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_killed_expr(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{killed_expr = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{killed_expr = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_avail_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{avail_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{avail_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_avail_out(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{avail_out = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{avail_out = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_antic_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{antic_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{antic_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_antic_out(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{antic_out = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{antic_out = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_later_in(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{later_in = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{later_in = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_delete(NodeInfo, Label, Data) -> + NodeData = + case gb_trees:lookup(Label, NodeInfo) of + none -> + #node_data{delete = Data}; + {value, OldNodeData} -> + OldNodeData#node_data{delete = Data} + end, + gb_trees:enter(Label, NodeData, NodeInfo). + +set_earliest(EdgeInfo, Edge, Data) -> + EdgeData = + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + #edge_data{earliest = Data}; + {value, OldEdgeData} -> + OldEdgeData#edge_data{earliest = Data} + end, + gb_trees:enter(Edge, EdgeData, EdgeInfo). + +set_later(EdgeInfo, Edge, Data) -> + EdgeData = + case gb_trees:lookup(Edge, EdgeInfo) of + none -> + #edge_data{later = Data}; + {value, OldEdgeData} -> + OldEdgeData#edge_data{later = Data} + end, + gb_trees:enter(Edge, EdgeData, EdgeInfo). + +%% set_insert(EdgeInfo, Edge, Data) -> +%% EdgeData = +%% case gb_trees:lookup(Edge, EdgeInfo) of +%% none -> +%% #edge_data{insert = Data}; +%% {value, OldEdgeData} -> +%% OldEdgeData#edge_data{insert = Data} +%% end, +%% gb_trees:enter(Edge, EdgeData, EdgeInfo). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%% USE MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The use map is a mapping from "use" (which is an rtl register/variable) +%% to a set of expressions (IDs) where that register/variable is used. +%% It is used by calc_killed_expr to know what expressions are affected by +%% a definition. + +%%============================================================================= +%% Creates and calculates the use map for a CFG. +%% It uses ExprMap to lookup the expression IDs. +mk_use_map(CFG, ExprMap) -> + Labels = hipe_rtl_cfg:reverse_postorder(CFG), + NewMap = mk_use_map(gb_trees:empty(), CFG, ExprMap, Labels), + gb_trees:balance(NewMap). + +mk_use_map(Map, _, _, []) -> + Map; +mk_use_map(Map, CFG, ExprMap, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + NewMap = mk_use_map_bb(Map, ExprMap, Code), + mk_use_map(NewMap, CFG, ExprMap, Labels). + +mk_use_map_bb(UseMap, _, []) -> + UseMap; +mk_use_map_bb(UseMap, ExprMap, [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + Uses = hipe_rtl:uses(Instr), + ExprId = expr_map_get_id(ExprMap, expr_clear_dst(Instr)), + NewUseMap = mk_use_map_insert_uses(UseMap, ExprId, Uses), + mk_use_map_bb(NewUseMap, ExprMap, Instrs); + false -> + mk_use_map_bb(UseMap, ExprMap, Instrs) + end. + +%%============================================================================= +%% Worker function for mk_use_map that inserts the expression id for every +%% rtl register the expression uses in a use map. +mk_use_map_insert_uses(Map, _, []) -> + Map; +mk_use_map_insert_uses(Map, Expr, [Use|Uses]) -> + case gb_trees:lookup(Use, Map) of + {value, UseSet} -> + NewUseSet = ?SETS:add_element(Expr, UseSet), + mk_use_map_insert_uses(gb_trees:update(Use, NewUseSet, Map), Expr, Uses); + none -> + UseSet = ?SETS:new(), + NewUseSet = ?SETS:add_element(Expr, UseSet), + mk_use_map_insert_uses(gb_trees:insert(Use, NewUseSet, Map), Expr, Uses) + end. + +%%============================================================================= +%% Gets a set of expressions where the given rtl register is used. +use_map_get_expr_uses(Map, Reg) -> + case gb_trees:lookup(Reg, Map) of + {value, UseSet} -> + UseSet; + none -> + ?SETS:new() + end. + +%%%%%%%%%%%%%%%%%%%%%% EXPRESSION MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% The expression map is a mapping from expression to +%% (1) Expression Id (Integer used to speed up set operations) +%% (2) List of definitions (labels where the expression is defined and the +%% list of registers or variables defined by an instruction in that label, +%% represented as a tuple {Label, Defines}) +%% (3) The list of replacement registers created for the expression + +%%============================================================================= +%% Creates and calculates the expression map for a CFG. +mk_expr_map(CFG) -> + init_expr_id(), + Labels = hipe_rtl_cfg:reverse_postorder(CFG), + {ExprMap, IdMap} = mk_expr_map(gb_trees:empty(), gb_trees:empty(), + CFG, Labels), + {gb_trees:balance(ExprMap), gb_trees:balance(IdMap)}. + +mk_expr_map(ExprMap, IdMap, _, []) -> + {ExprMap, IdMap}; +mk_expr_map(ExprMap, IdMap, CFG, [Label|Labels]) -> + Code = hipe_bb:code(hipe_rtl_cfg:bb(CFG, Label)), + {NewExprMap, NewIdMap} = mk_expr_map_bb(ExprMap, IdMap, Label, Code), + mk_expr_map(NewExprMap, NewIdMap, CFG, Labels). + +mk_expr_map_bb(ExprMap, IdMap, _, []) -> + {ExprMap, IdMap}; +mk_expr_map_bb(ExprMap, IdMap, Label, [Instr|Instrs]) -> + case is_expr(Instr) of + true -> + Expr = expr_clear_dst(Instr), + Defines = hipe_rtl:defines(Instr), + case gb_trees:lookup(Expr, ExprMap) of + {value, {ExprId, DefinesList, ReplRegs}} -> + NewExprMap = gb_trees:update(Expr, {ExprId, + [{Label, Defines}|DefinesList], + ReplRegs}, ExprMap), + mk_expr_map_bb(NewExprMap, IdMap, Label, Instrs); + none -> + NewExprId = new_expr_id(), + NewReplRegs = mk_replacement_regs(Defines), + NewExprMap = gb_trees:insert(Expr, {NewExprId, + [{Label, Defines}], + NewReplRegs}, ExprMap), + NewIdMap = gb_trees:insert(NewExprId, Expr, IdMap), + mk_expr_map_bb(NewExprMap, NewIdMap, Label, Instrs) + end; + false -> + mk_expr_map_bb(ExprMap, IdMap, Label, Instrs) + end. + +%%============================================================================= +%% Creates new temporaries to replace defines in moved expressions. +mk_replacement_regs([]) -> + []; +mk_replacement_regs(Defines) -> + mk_replacement_regs(Defines, []). + +mk_replacement_regs([], NewRegs) -> + lists:reverse(NewRegs); +mk_replacement_regs([Define|Defines], NewRegs) -> + case hipe_rtl:is_reg(Define) of + true -> + NewReg = + case hipe_rtl:reg_is_gcsafe(Define) of + true -> hipe_rtl:mk_new_reg_gcsafe(); + false -> hipe_rtl:mk_new_reg() + end, + mk_replacement_regs(Defines, [NewReg|NewRegs]); + false -> + case hipe_rtl:is_var(Define) of + true -> + mk_replacement_regs(Defines, [hipe_rtl:mk_new_var()|NewRegs]); + false -> + true = hipe_rtl:is_fpreg(Define), + mk_replacement_regs(Defines, [hipe_rtl:mk_new_fpreg()|NewRegs]) + end + end. + +%%============================================================================= +%% Performs a lookup, which returns a tuple +%% {expression ID, list of definitions, list of replacement registers} +expr_map_lookup(Map, Expr) -> + gb_trees:lookup(Expr, Map). + +%%============================================================================= +%% Gets the actual RTL instruction to be generated for insertions of an +%% expression. +expr_map_get_instr(Map, Expr) -> + case gb_trees:lookup(Expr, Map) of + {value, {_, _, Regs}} -> + expr_set_dst(Expr, Regs); + none -> + exit({?MODULE, expr_map_get_instr, "expression missing"}) + end. + +%%============================================================================= +%% Gets expression id. +expr_map_get_id(Map, Expr) -> + case gb_trees:lookup(Expr, Map) of + {value, {ExprId, _, _}} -> + ExprId; + none -> + exit({?MODULE, expr_map_get_instr, "expression missing"}) + end. + +%%============================================================================= +%% Creates an rtl instruction that moves a value +mk_expr_move_instr([Reg], [Define]) -> + case hipe_rtl:is_fpreg(Reg) of + true -> + hipe_rtl:mk_fmove(Reg, Define); + false -> + %% FIXME Check is_var() orelse is_reg() ? + hipe_rtl:mk_move(Reg, Define) + end; +mk_expr_move_instr([_Reg|_Regs] = RegList, Defines) -> + %% FIXME Does this really work? What about floats... + %% (Multiple defines does not seem to be used by any of the + %% instructions considered by rtl_lcm at the moment so this is pretty much + %% untested/unused.) + hipe_rtl:mk_multimove(RegList, Defines); +mk_expr_move_instr(_, []) -> + exit({?MODULE, mk_expr_move_instr, "bad match"}). + +%%============================================================================= +%% Returns a set of all expressions in the code. +%% all_exprs(_CFG, []) -> +%% ?SETS:new(); +%% all_exprs(CFG, [Label|Labels]) -> +%% BB = hipe_rtl_cfg:bb(CFG, Label), +%% Code = hipe_bb:code(BB), +%% ?SETS:union(all_exprs_bb(Code), +%% all_exprs(CFG, Labels)). + +%%============================================================================= +%% Returns a set of expressions in a basic block. +%% all_exprs_bb([]) -> +%% ?SETS:new(); +%% all_exprs_bb([Instr|Instrs]) -> +%% case is_expr(Instr) of +%% true -> +%% Expr = expr_clear_dst(Instr), +%% ExprSet = all_exprs_bb(Instrs), +%% ?SETS:add_element(Expr, ExprSet); +%% false -> +%% all_exprs_bb(Instrs) +%% end. + +%%%%%%%%%%%%%%%%%% EXPRESSION ID -> EXPRESSION MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Map from expression IDs to expressions. +%%============================================================================= +%% mk_expr_id_map() -> +%% gb_trees:empty(). + +%% expr_id_map_insert(Map, ExprId, Expr) -> +%% gb_trees:insert(ExprId, Expr, Map). + +%% expr_id_map_lookup(Map, ExprId) -> +%% gb_trees:lookup(ExprId, Map). + +%%============================================================================= +%% Given expression id, gets expression. +expr_id_map_get_expr(Map, ExprId) -> + case gb_trees:lookup(ExprId, Map) of + {value, Expr} -> + Expr; + none -> + exit({?MODULE, expr_id_map_get_expr, "expression id missing"}) + end. + +%%============================================================================= +%% Expression ID counter +init_expr_id() -> + put({rtl_lcm,expr_id_count}, 0), + ok. + +-spec new_expr_id() -> non_neg_integer(). +new_expr_id() -> + Obj = {rtl_lcm, expr_id_count}, + V = get(Obj), + put(Obj, V+1), + V. + +%%%%%%%%%%%%%%%%%% EDGE BB (INSERT BETWEEN) MAP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Map from edges to labels. +%% This is used by insert_expr_between to remember what new bbs it has created +%% for insertions on edges, and thus for multiple insertions on the same edge +%% to end up in the same bb. +%%============================================================================= +mk_edge_bb_map() -> + gb_trees:empty(). + +edge_bb_map_insert(Map, Edge, Label) -> + gb_trees:enter(Edge, Label, Map). + +edge_bb_map_lookup(Map, Edge) -> + gb_trees:lookup(Edge, Map). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PRETTY-PRINTING %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%============================================================================= +%% Prints debug messages. +-ifdef(LCM_DEBUG). + +pp_debug(Str, Args) -> + case ?LCM_DEBUG of + true -> + io:format(standard_io, Str, Args); + false -> + ok + end. + +pp_debug_instr(Instr) -> + case ?LCM_DEBUG of + true -> + hipe_rtl:pp_instr(standard_io, Instr); + false -> + ok + end. + +-else. + +pp_debug(_, _) -> + ok. + +pp_debug_instr(_) -> + ok. + +-endif. %% DEBUG + +%%============================================================================= +%% Pretty-prints the calculated sets for the lazy code motion. +pp_sets(_, _, _, _, _, _, []) -> + ok; +pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, [Label|Labels]) -> + Preds = hipe_rtl_cfg:pred(CFG, Label), + Succs = hipe_rtl_cfg:succ(CFG, Label), + + io:format(standard_io, "Label ~w~n", [Label]), + io:format(standard_io, " Preds: ~w~n", [Preds]), + io:format(standard_io, " Succs: ~w~n", [Succs]), + + case up_exp(NodeInfo, Label) of + none -> ok; + UpExp -> + case ?SETS:size(UpExp) =:= 0 of + false -> + io:format(standard_io, " UEExpr: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(UpExp)); + true -> ok + end + end, + case down_exp(NodeInfo, Label) of + none -> ok; + DownExp -> + case ?SETS:size(DownExp) =:= 0 of + false -> + io:format(standard_io, " DEExpr: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(DownExp)); + true -> ok + end + end, + case killed_expr(NodeInfo, Label) of + none -> ok; + KilledExpr -> + case ?SETS:size(KilledExpr) =:= 0 of + false -> + io:format(standard_io, " ExprKill: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(KilledExpr)); + true -> ok + end + end, + case avail_in(NodeInfo, Label) of + none -> ok; + AvailIn -> + case ?SETS:size(AvailIn) =:= 0 of + false -> + io:format(standard_io, " AvailIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AvailIn)); + true -> ok + end + end, + case avail_out(NodeInfo, Label) of + none -> ok; + AvailOut -> + case ?SETS:size(AvailOut) =:= 0 of + false -> + io:format(standard_io, " AvailOut: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AvailOut)); + true -> ok + end + end, + case antic_in(NodeInfo, Label) of + none -> ok; + AnticIn -> + case ?SETS:size(AnticIn) =:= 0 of + false -> + io:format(standard_io, " AnticIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AnticIn)); + true -> ok + end + end, + case antic_out(NodeInfo, Label) of + none -> ok; + AnticOut -> + case ?SETS:size(AnticOut) =:= 0 of + false -> + io:format(standard_io, " AnticOut: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(AnticOut)); + true -> ok + end + end, + case later_in(NodeInfo, Label) of + none -> ok; + LaterIn -> + case ?SETS:size(LaterIn) =:= 0 of + false -> + io:format(standard_io, " LaterIn: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(LaterIn)); + true -> ok + end + end, + + pp_earliest(ExprMap, IdMap, EdgeInfo, Label, Succs), + pp_later(ExprMap, IdMap, EdgeInfo, Label, Succs), + + case delete(NodeInfo, Label) of + none -> ok; + Delete -> + case ?SETS:size(Delete) =:= 0 of + false -> + io:format(standard_io, " Delete: ~n", []), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Delete)); + true -> ok + end + end, + pp_sets(ExprMap, IdMap, NodeInfo, EdgeInfo, AllExpr, CFG, Labels). + +%%============================================================================= +%% Pretty-prints the later set. +pp_later(_, _, _, _, []) -> + ok; +pp_later(ExprMap, IdMap, EdgeInfo, Pred, [Succ|Succs]) -> + case later(EdgeInfo, {Pred, Succ}) of + none -> ok; + Later -> + case ?SETS:size(Later) =:= 0 of + false -> + io:format(standard_io, " Later(~w->~w): ~n", [Pred,Succ]), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Later)); + true -> ok + end + end, + pp_later(ExprMap, IdMap, EdgeInfo, Pred, Succs). + +%%============================================================================= +%% Pretty-prints the earliest set. +pp_earliest(_, _, _, _, []) -> + ok; +pp_earliest(ExprMap, IdMap, EdgeInfo, Pred, [Succ|Succs]) -> + case earliest(EdgeInfo, {Pred, Succ}) of + none -> ok; + Earliest -> + case ?SETS:size(Earliest) =:= 0 of + false -> + io:format(standard_io, " Earliest(~w->~w): ~n", [Pred,Succ]), + pp_exprs(ExprMap, IdMap, ?SETS:to_list(Earliest)); + true -> ok + end + end, + pp_earliest(ExprMap, IdMap, EdgeInfo, Pred, Succs). + +%%============================================================================= +%% Pretty-prints an expression +pp_expr(ExprMap, IdMap, ExprId) -> + Expr = expr_id_map_get_expr(IdMap, ExprId), + hipe_rtl:pp_instr(standard_io, expr_map_get_instr(ExprMap, Expr)). + +pp_exprs(_, _, []) -> + ok; +pp_exprs(ExprMap, IdMap, [E|Es]) -> + pp_expr(ExprMap, IdMap, E), + pp_exprs(ExprMap, IdMap, Es). diff --git a/lib/hipe/rtl/hipe_rtl_liveness.erl b/lib/hipe/rtl/hipe_rtl_liveness.erl new file mode 100644 index 0000000000..3cfada9d6c --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_liveness.erl @@ -0,0 +1,145 @@ +%% $Id$ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% LIVENESS ANALYSIS +%% +%% Exports: +%% ~~~~~~~ +%% analyze(CFG) - returns a liveness analysis of CFG. +%% liveout(Liveness, Label) - returns a set of variables that are live on +%% exit from basic block named Label. +%% livein(Liveness, Label) - returns a set of variables that are live on +%% entry to the basic block named Label. +%% list(Instructions, LiveOut) - Given a list of instructions and a liveout +%% set, returns a set of variables live at the first instruction. +%% + +-module(hipe_rtl_liveness). + +%% -define(LIVEOUT_NEEDED,true). % needed for liveness.inc below. +-define(PRETTY_PRINT,false). + +-include("hipe_rtl.hrl"). +-include("../flow/liveness.inc"). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Interface to CFG and RTL. +%% + +cfg_bb(CFG, L) -> + hipe_rtl_cfg:bb(CFG, L). + +cfg_postorder(CFG) -> + hipe_rtl_cfg:postorder(CFG). + +cfg_succ(CFG, L) -> + hipe_rtl_cfg:succ(CFG, L). + +uses(Instr) -> + hipe_rtl:uses(Instr). + +defines(Instr) -> + hipe_rtl:defines(Instr). + +%% +%% This is the list of registers that are live at exit from a function +%% + +liveout_no_succ() -> + hipe_rtl_arch:live_at_return(). + +%% +%% The following are used only if annotation of the code is requested. +%% + +cfg_labels(CFG) -> + hipe_rtl_cfg:reverse_postorder(CFG). + +pp_block(Label, CFG) -> + BB=hipe_rtl_cfg:bb(CFG, Label), + Code=hipe_bb:code(BB), + hipe_rtl:pp_block(Code). + +pp_liveness_info(LiveList) -> + NewList=remove_precoloured(LiveList), + print_live_list(NewList). + +print_live_list([]) -> + io:format(" none~n", []); +print_live_list([Last]) -> + io:format(" ", []), + print_var(Last), + io:format("~n", []); +print_live_list([Var|Rest]) -> + io:format(" ", []), + print_var(Var), + io:format(",", []), + print_live_list(Rest). + +print_var(A) -> + case hipe_rtl:is_var(A) of + true -> + pp_var(A); + false -> + case hipe_rtl:is_reg(A) of + true -> + pp_reg(A); + false -> + case hipe_rtl:is_fpreg(A) of + true -> + io:format("f~w", [hipe_rtl:fpreg_index(A)]); + false -> + io:format("unknown:~w", [A]) + end + end + end. + +pp_hard_reg(N) -> + io:format("~s", [hipe_rtl_arch:reg_name(N)]). + +pp_reg(Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(hipe_rtl:reg_index(Arg)); + false -> + io:format("r~w", [hipe_rtl:reg_index(Arg)]) + end. + +pp_var(Arg) -> + case hipe_rtl_arch:is_precoloured(Arg) of + true -> + pp_hard_reg(hipe_rtl:var_index(Arg)); + false -> + io:format("v~w", [hipe_rtl:var_index(Arg)]) + end. + +remove_precoloured(List) -> + List. + %% [X || X <- List, not hipe_rtl_arch:is_precoloured(X)]. + +-ifdef(DEBUG_LIVENESS). +cfg_bb_add(CFG, L, NewBB) -> + hipe_rtl_cfg:bb_add(CFG, L, NewBB). + +mk_comment(Text) -> + hipe_rtl:mk_comment(Text). +-endif. diff --git a/lib/hipe/rtl/hipe_rtl_mk_switch.erl b/lib/hipe/rtl/hipe_rtl_mk_switch.erl new file mode 100644 index 0000000000..e5175217d6 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_mk_switch.erl @@ -0,0 +1,985 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_mk_switch.erl +%% Module : hipe_rtl_mk_switch +%% Purpose : Implements switching on Erlang values. +%% Notes : Only fixnums are supported well, +%% atoms work with table search, +%% the inline search of atoms might have some bugs. +%% Should be extended to handle bignums and floats. +%% +%% History : * 2001-02-28 Erik Johansson (happi@it.uu.se): +%% Created. +%% * 2001-04-01 Erik Trulsson (ertr1013@csd.uu.se): +%% Stefan Lindstr�m (stli3993@csd.uu.se): +%% Added clustering and inlined binary search trees. +%% * 2001-07-30 EJ (happi@it.uu.se): +%% Fixed some bugs and started cleanup. +%% ==================================================================== +%% Exports : +%% gen_switch_val(I, VarMap, ConstTab, Options) +%% gen_switch_tuple(I, Map, ConstTab, Options) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_mk_switch). + +-export([gen_switch_val/4, gen_switch_tuple/4]). + +%%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). + +%%------------------------------------------------------------------------- + +-define(MINFORJUMPTABLE,9). + % Minimum number of integers needed to use something else than an inline search. +-define(MINFORINTSEARCHTREE,65). % Must be at least 3 + % Minimum number of integer elements needed to use a non-inline binary search. + +-define(MININLINEATOMSEARCH,8). + % Minimum number of atoms needed to use an inline binary search instead + % of a fast linear search. + +-define(MINFORATOMSEARCHTREE,20). % Must be at least 3 + % Minimum number of atoms needed to use a non-inline binary search instead + % of a linear search. + +-define(MAXINLINEATOMSEARCH,64). % Must be at least 3 + % The cutoff point between inlined and non-inlined binary search for atoms + +-define(WORDSIZE, hipe_rtl_arch:word_size()). +-define(MINDENSITY, 0.5). + % Minimum density required to use a jumptable instead of a binary search. + +%% The reason why MINFORINTSEARCHTREE and MINFORATOMSEARCHTREE must be +%% at least 3 is that the function tab/5 will enter an infinite loop +%% and hang when faced with a switch of size 1 or 2. + + +%% Options used by this module: +%% +%% [no_]use_indexing +%% Determines if any indexing be should be done at all. Turned on +%% by default at optimization level o2 and higher. +%% +%% [no_]use_clusters +%% Controls whether we attempt to divide sparse integer switches +%% into smaller dense clusters for which jumptables are practical. +%% Turned off by default since it can increase compilation time +%% considerably and most programs will gain little benefit from it. +%% +%% [no_]use_inline_atom_search +%% Controls whether we use an inline binary search for small number +%% of atoms. Turned off by default since this is currently only +%% supported on SPARC (and not on x86) and probably needs a bit +%% more testing before it can be turned on by default. + +gen_switch_val(I, VarMap, ConstTab, Options) -> + case proplists:get_bool(use_indexing, Options) of + false -> gen_slow_switch_val(I, VarMap, ConstTab, Options); + true -> gen_fast_switch_val(I, VarMap, ConstTab, Options) + end. + +gen_fast_switch_val(I, VarMap, ConstTab, Options) -> + {Arg, VarMap0} = + hipe_rtl_varmap:icode_var2rtl_var(hipe_icode:switch_val_term(I), VarMap), + IcodeFail = hipe_icode:switch_val_fail_label(I), + {Fail, VarMap1} = hipe_rtl_varmap:icode_label2rtl_label(IcodeFail, VarMap0), + %% Important that the list of cases is sorted when handling integers. + UnsortedCases = hipe_icode:switch_val_cases(I), + Cases = lists:sort(UnsortedCases), + + check_duplicates(Cases), + %% This check is currently not really necessary. The checking + %% happens at an earlier phase of the compilation. + {Types, InitCode} = split_types(Cases, Arg), + handle_types(Types, InitCode, VarMap1, ConstTab, Arg, {I, Fail, Options}). + +handle_types([{Type,Lbl,Cases}|Types], Code, VarMap, ConstTab, Arg, Info) -> + {Code1,VarMap1,ConstTab1} = gen_fast_switch_on(Type, Cases, + VarMap, + ConstTab, Arg, Info), + handle_types(Types, [Code,Lbl,Code1], VarMap1, ConstTab1, Arg, Info); +handle_types([], Code, VarMap, ConstTab, _, _) -> + {Code, VarMap, ConstTab}. + + +gen_fast_switch_on(integer, Cases, VarMap, ConstTab, Arg, {I, Fail, Options}) -> + {First,_} = hd(Cases), + Min = hipe_icode:const_value(First), + if length(Cases) < ?MINFORJUMPTABLE -> + gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options); + true -> + case proplists:get_bool(use_clusters, Options) of + false -> + M = list_to_tuple(Cases), + D = density(M, 1, tuple_size(M)), + if + D >= ?MINDENSITY -> + gen_jump_table(Arg,Fail,hipe_icode:switch_val_fail_label(I),VarMap,ConstTab,Cases,Min); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end; + true -> + MC = minclusters(Cases), + Cl = cluster_split(Cases,MC), + CM = cluster_merge(Cl), + find_cluster(CM,VarMap,ConstTab,Options,Arg,Fail,hipe_icode:switch_val_fail_label(I)) + end + end; +gen_fast_switch_on(atom, Cases, VarMap, ConstTab, Arg, {_I, Fail, Options}) -> + case proplists:get_bool(use_inline_atom_search, Options) of + true -> + if + length(Cases) < ?MININLINEATOMSEARCH -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + length(Cases) > ?MAXINLINEATOMSEARCH -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,Options) + end; + false -> + if length(Cases) < ?MINFORATOMSEARCHTREE -> + gen_linear_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options); + true -> + gen_search_switch_val(Arg, Cases, Fail, VarMap, ConstTab, Options) + end + end; +gen_fast_switch_on(_, _, VarMap, ConstTab, _, {I,_Fail,Options}) -> + %% We can only handle smart indexing of integers and atoms + %% TODO: Consider bignum + gen_slow_switch_val(I, VarMap, ConstTab, Options). + + +%% Split different types into separate switches. +split_types([Case|Cases], Arg) -> + Type1 = casetype(Case), + Types = split(Cases,Type1,[Case],[]), + switch_on_types(Types,[], [], Arg); +split_types([],_) -> + %% Cant happen. + ?EXIT({empty_caselist}). + +switch_on_types([{Type,Cases}], AccCode, AccCases, _Arg) -> + Lbl = hipe_rtl:mk_new_label(), + I = hipe_rtl:mk_goto(hipe_rtl:label_name(Lbl)), + {[{Type,Lbl,lists:reverse(Cases)} | AccCases], lists:reverse([I|AccCode])}; +switch_on_types([{other,Cases} | Rest], AccCode, AccCases, Arg) -> + %% Make sure the general case is handled last. + switch_on_types(Rest ++ [{other,Cases}], AccCode, AccCases, Arg); +switch_on_types([{Type,Cases} | Rest], AccCode, AccCases, Arg) -> + TLab = hipe_rtl:mk_new_label(), + FLab = hipe_rtl:mk_new_label(), + TestCode = + case Type of + integer -> + hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + atom -> + hipe_tagscheme:test_atom(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + bignum -> + hipe_tagscheme:test_bignum(Arg, hipe_rtl:label_name(TLab), + hipe_rtl:label_name(FLab), 0.5); + _ -> ?EXIT({ooops, type_not_handled, Type}) + end, + switch_on_types(Rest, [[TestCode,FLab] | AccCode], + [{Type,TLab,lists:reverse(Cases)} | AccCases], Arg). + +split([Case|Cases], Type, Current, Rest) -> + case casetype(Case) of + Type -> + split(Cases, Type, [Case|Current],Rest); + Other -> + split(Cases, Other, [Case], [{Type,Current}|Rest]) + end; +split([], Type, Current, Rest) -> + [{Type, Current} | Rest]. + +%% Determine what type an entry in the caselist has + +casetype({Const,_}) -> + casetype(hipe_icode:const_value(Const)); +casetype(A) -> + if + is_integer(A) -> + case hipe_tagscheme:is_fixnum(A) of + true -> integer; + false -> bignum + end; + is_float(A) -> float; + is_atom(A) -> atom; + true -> other + end. + +%% check that no duplicate values occur in the case list and also +%% check that all case values have the same type. +check_duplicates([]) -> true; +check_duplicates([_]) -> true; +check_duplicates([{Const1,_},{Const2,L2}|T]) -> + C1 = hipe_icode:const_value(Const1), + C2 = hipe_icode:const_value(Const2), + %% T1 = casetype(C1), + %% T2 = casetype(C2), + if C1 =/= C2 -> %% , T1 =:= T2 -> + check_duplicates([{Const2,L2}|T]); + true -> + ?EXIT({bad_values_in_switchval,C1}) + end. + +%% +%% Determine the optimal way to divide Cases into clusters such that each +%% cluster is dense. +%% +%% See: +%% Producing Good Code for the Case Statement, Robert L. Bernstein +%% Software - Practice and Experience vol 15, 1985, no 10, pp 1021--1024 +%% And +%% Correction to "Producing Good Code for the Case Statement" +%% Sampath Kannan and Todd A. Proebsting, +%% Software - Practice and Experience vol 24, 1994, no 2, p 233 +%% +%% (The latter is where the algorithm comes from.) + +%% This function will return a tuple with the first element being 0 +%% The rest of the elements being integers. A value of M at index N +%% (where the first element is considered to have index 0) means that +%% the first N cases can be divided into M (but no fewer) clusters where +%% each cluster is dense. + +minclusters(Cases) when is_list(Cases) -> + minclusters(list_to_tuple(Cases)); +minclusters(Cases) when is_tuple(Cases) -> + N = tuple_size(Cases), + MinClusters = list_to_tuple([0|n_list(N,inf)]), + i_loop(1,N,MinClusters,Cases). + +%% Create a list with N elements initialized to Init +n_list(0,_) -> []; +n_list(N,Init) -> [Init | n_list(N-1,Init)]. + +%% Do the dirty work of minclusters +i_loop(I,N,MinClusters,_Cases) when I > N -> + MinClusters; +i_loop(I,N,MinClusters,Cases) when I =< N -> + M = j_loop(0, I-1, MinClusters, Cases), + i_loop(I+1, N, M, Cases). + +%% More dirty work +j_loop(J,I1,MinClusters,_Cases) when J > I1 -> + MinClusters; +j_loop(J,I1,MinClusters,Cases) when J =< I1 -> + D = density(Cases,J+1,I1+1), + A0 = element(J+1,MinClusters), + A = if + is_number(A0) -> + A0+1; + true -> + A0 + end, + B = element(I1+2,MinClusters), + M = if + D >= ?MINDENSITY, A<B -> + setelement(I1+2,MinClusters,A); + true -> + MinClusters + end, + j_loop(J+1,I1,M,Cases). + + +%% Determine the density of a (subset of a) case list +%% A is a tuple with the cases in order from smallest to largest +%% I is the index of the first element and J of the last + +density(A,I,J) -> + {AI,_} = element(I,A), + {AJ,_} = element(J,A), + (J-I+1)/(hipe_icode:const_value(AJ)-hipe_icode:const_value(AI)+1). + + +%% Split a case list into dense clusters +%% Returns a list of lists of cases. +%% +%% Cases is the case list and Clust is a list describing the optimal +%% clustering as returned by minclusters +%% +%% If the value in the last place in minclusters is M then we can +%% split the case list into M clusters. We then search for the last +%% (== right-most) occurance of the value M-1 in minclusters. That +%% indicates the largest number of cases that can be split into M-1 +%% clusters. This means that the cases in between constitute one +%% cluster. Then we recurse on the remainder of the cases. +%% +%% The various calls to lists:reverse are just to ensure that the +%% cases remain in the correct, sorted order. + +cluster_split(Cases, Clust) -> + A = tl(tuple_to_list(Clust)), + Max = element(tuple_size(Clust), Clust), + L1 = lists:reverse(Cases), + L2 = lists:reverse(A), + cluster_split(Max, [], [], L1, L2). + +cluster_split(0, [], Res, Cases, _Clust) -> + L = lists:reverse(Cases), + {H,_} = hd(L), + {T,_} = hd(Cases), + [{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),L}|Res]; +cluster_split(N, [], Res, Cases, [N|_] = Clust) -> + cluster_split(N-1, [], Res, Cases, Clust); +cluster_split(N,Sofar,Res,Cases,[N|Clust]) -> + {H,_} = hd(Sofar), + {T,_} = lists:last(Sofar), + cluster_split(N-1,[],[{dense,hipe_icode:const_value(H),hipe_icode:const_value(T),Sofar}|Res],Cases,[N|Clust]); +cluster_split(N,Sofar,Res,[C|Cases],[_|Clust]) -> + cluster_split(N,[C|Sofar],Res,Cases,Clust). + +%% +%% Merge adjacent small clusters into larger sparse clusters +%% +cluster_merge([C]) -> [C]; +cluster_merge([{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + C2 = cluster_merge(T), + [{dense,Min,Max,C}|C2]; +cluster_merge([{sparse,Min,_,C},{sparse,_,Max,D}|T]) -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{sparse,Min,_,C},{dense,_,Max,D}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,C},{dense,_,Max,D}|T]) when length(C) < ?MINFORJUMPTABLE, length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([{dense,Min,_,D},{sparse,_,Max,C}|T]) when length(D) < ?MINFORJUMPTABLE -> + R = {sparse,Min,Max,C ++ D}, + cluster_merge([R|T]); +cluster_merge([A,{dense,Min,Max,C}|T]) when length(C) >= ?MINFORJUMPTABLE -> + R = cluster_merge([{dense,Min,Max,C}|T]), + [A|R]. + + +%% Generate code to search for the correct cluster + +find_cluster([{sparse,_Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,_IcodeFail) -> + case length(C) < ?MINFORINTSEARCHTREE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_search_switch_val(Arg,C,Fail,VarMap,ConstTab,Options) + end; +find_cluster([{dense,Min,_Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + case length(C) < ?MINFORJUMPTABLE of + true -> + gen_small_switch_val(Arg,C,Fail,VarMap,ConstTab,Options); + _ -> + gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,C,Min) + end; +find_cluster([{Density,Min,Max,C}|T],VarMap,ConstTab,Options,Arg,Fail,IcodeFail) -> + ClustLab = hipe_rtl:mk_new_label(), + NextLab = hipe_rtl:mk_new_label(), + {ClustCode,V1,C1} = find_cluster([{Density,Min,Max,C}],VarMap,ConstTab,Options,Arg,Fail,IcodeFail), + + {Rest,V2,C2} = find_cluster(T,V1,C1,Options,Arg,Fail,IcodeFail), + + {[ + hipe_rtl:mk_branch(Arg, gt, hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Max)), + hipe_rtl:label_name(NextLab), + hipe_rtl:label_name(ClustLab), 0.50), + ClustLab + ] ++ + ClustCode ++ + [NextLab] ++ + Rest, + V2,C2}. + +%% Generate efficient code for a linear search through the case list. +%% Only works for atoms and integer. +gen_linear_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Code = fast_linear_search(Arg,Values,LabMap,Fail), + {Code,VarMap1,ConstTab}. + +fast_linear_search(_Arg,[],[],Fail) -> + [hipe_rtl:mk_goto(hipe_rtl:label_name(Fail))]; +fast_linear_search(Arg,[Case|Cases],[Label|Labels],Fail) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + NextLab = hipe_rtl:mk_new_label(), + C2 = fast_linear_search(Arg,Cases,Labels,Fail), + C1 = + if + is_integer(Case) -> + TVal = hipe_tagscheme:mk_fixnum(Case), + [ + hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(TVal)), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + is_atom(Case) -> + [ + hipe_rtl:mk_load_atom(Reg,Case), + hipe_rtl:mk_branch(Arg,eq,Reg, + Label, + hipe_rtl:label_name(NextLab), 0.5), + NextLab + ]; + true -> % This should never happen ! + ?EXIT({internal_error_in_switch_val,Case}) + end, + [C1,C2]. + + +%% Generate code to search through a small cluster of integers using +%% binary search +gen_small_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values,_Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + Keys = [hipe_tagscheme:mk_fixnum(X) % Add tags to the values + || X <- Values], + Code = inline_search(Keys, LabMap, Arg, Fail), + {Code, VarMap1, ConstTab}. + + +%% Generate code to search through a small cluster of atoms +gen_atom_switch_val(Arg,Cases,Fail,VarMap,ConstTab,_Options) -> + {Values, _Labels} = split_cases(Cases), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + LMap = [{label,L} || L <- LabMap], + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {NewConstTab2,LabId} = + hipe_consttab:insert_sorted_block(NewConstTab, word, LMap, Values), + Code = inline_atom_search(0, length(Cases)-1, Id, LabId, Arg, Fail, LabMap), + {Code, VarMap1, NewConstTab2}. + + +%% calculate the middle position of a list (+ 1 because of 1-indexing of lists) +get_middle(List) -> + N = length(List), + N div 2 + 1. + +%% get element [N1, N2] from a list +get_cases(_, 0, 0) -> + []; +get_cases([H|T], 0, N) -> + [H | get_cases(T, 0, N - 1)]; +get_cases([_|T], N1, N2) -> + get_cases(T, N1 - 1, N2 - 1). + + +%% inline_search/4 creates RTL code for a inlined binary search. +%% It requires two sorted tables - one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% Default - A label to jump to if the key is not found. +%% + +inline_search([], _LabelList, _KeyReg, _Default) -> []; +inline_search(KeyList, LabelList, KeyReg, Default) -> + %% Create some registers and labels that we need. + Reg = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + + Length = length(KeyList), + + if + Length >= 3 -> + %% Get middle element and keys/labels before that and after + Middle_pos = get_middle(KeyList), + Middle_key = lists:nth(Middle_pos, KeyList), + Keys_beginning = get_cases(KeyList, 0, Middle_pos - 1), + Labels_beginning = get_cases(LabelList, 0, Middle_pos - 1), + Keys_ending = get_cases(KeyList, Middle_pos, Length), + Labels_ending = get_cases(LabelList, Middle_pos, Length), + + %% Create the code. + + %% Get the label and build it up properly + Middle_label = lists:nth(Middle_pos, LabelList), + + A = [hipe_rtl:mk_move(Reg, hipe_rtl:mk_imm(Middle_key)), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + Middle_label , 0.5), + Lab2], + %% build search tree for keys less than the middle element + B = inline_search(Keys_beginning, Labels_beginning, KeyReg, Default), + %% ...and for keys bigger than the middle element + D = inline_search(Keys_ending, Labels_ending, KeyReg, Default), + + %% append the code and return it + A ++ B ++ [Lab3] ++ D; + + Length =:= 2 -> + %% get the first and second elements and theirs labels + Key_first = hd(KeyList), + First_label = hd(LabelList), + + %% Key_second = hipe_tagscheme:mk_fixnum(lists:nth(2, KeyList)), + Key_second = lists:nth(2, KeyList), + Second_label = lists:nth(2, LabelList), + + NewLab = hipe_rtl:mk_new_label(), + + %% compare them + A = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_first)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + First_label, + hipe_rtl:label_name(NewLab) , 0.5), + NewLab], + + B = [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key_second)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Second_label, + hipe_rtl:label_name(Default) , 0.5)], + A ++ B; + + Length =:= 1 -> + Key = hd(KeyList), + Label = hd(LabelList), + + [hipe_rtl:mk_move(Reg,hipe_rtl:mk_imm(Key)), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + Label, + hipe_rtl:label_name(Default) , 0.5)] + end. + + +inline_atom_search(Start, End, Block, LBlock, KeyReg, Default, Labels) -> + Reg = hipe_rtl:mk_new_reg_gcsafe(), + + Length = (End - Start) + 1, + + if + Length >= 3 -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + Mid = ((End-Start) div 2)+Start, + End1 = Mid-1, + Start1 = Mid+1, + A = [ + hipe_rtl:mk_load_word_index(Reg,Block,Mid), + hipe_rtl:mk_branch(KeyReg, lt, Reg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_branch(KeyReg, gt, Reg, + hipe_rtl:label_name(Lab3), + hipe_rtl:label_name(Lab4), 0.5), + Lab4, + hipe_rtl:mk_goto_index(LBlock, Mid, Labels), + Lab2 + ], + B = [inline_atom_search(Start,End1,Block,LBlock,KeyReg,Default,Labels)], + C = [inline_atom_search(Start1,End,Block,LBlock,KeyReg,Default,Labels)], + A ++ B ++ [Lab3] ++ C; + + Length =:= 2 -> + L1 = hipe_rtl:mk_new_label(), + L2 = hipe_rtl:mk_new_label(), + L3 = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L1), + hipe_rtl:label_name(L2), 0.5), + L1, + hipe_rtl:mk_goto_index(LBlock,Start,Labels), + + L2, + hipe_rtl:mk_load_word_index(Reg,Block,End), + hipe_rtl:mk_branch(KeyReg,eq,Reg, + hipe_rtl:label_name(L3), + hipe_rtl:label_name(Default), 0.5), + L3, + hipe_rtl:mk_goto_index(LBlock, End, Labels) + ]; + + Length =:= 1 -> + NewLab = hipe_rtl:mk_new_label(), + [ + hipe_rtl:mk_load_word_index(Reg,Block,Start), + hipe_rtl:mk_branch(KeyReg, eq, Reg, + hipe_rtl:label_name(NewLab), + hipe_rtl:label_name(Default), 0.9), + NewLab, + hipe_rtl:mk_goto_index(LBlock, Start, Labels) + ] + end. + + +%% Create a jumptable +gen_jump_table(Arg,Fail,IcodeFail,VarMap,ConstTab,Cases,Min) -> + %% Map is a rtl mapping of Dense + {Max,DenseTbl} = dense_interval(Cases,Min,IcodeFail), + {Map,VarMap2} = lbls_from_cases(DenseTbl,VarMap), + + %% Make some labels and registers that we need. + BelowLab = hipe_rtl:mk_new_label(), + UntaggedR = hipe_rtl:mk_new_reg_gcsafe(), + StartR = hipe_rtl:mk_new_reg_gcsafe(), + + %% Generate the code to do the switch... + {[ + %% Untag the index. + hipe_tagscheme:untag_fixnum(UntaggedR, Arg)| + %% Check that the index is within Min and Max. + case Min of + 0 -> %% First element is 0 this is simple. + [hipe_rtl:mk_branch(UntaggedR, gtu, hipe_rtl:mk_imm(Max), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(UntaggedR, Map)]; + _ -> %% First element is not 0 + [hipe_rtl:mk_alu(StartR, UntaggedR, sub, + hipe_rtl:mk_imm(Min)), + hipe_rtl:mk_branch(StartR, gtu, hipe_rtl:mk_imm(Max-Min), + hipe_rtl:label_name(Fail), + hipe_rtl:label_name(BelowLab), 0.01), + BelowLab, + %% StartR contains the index into the jumptable + hipe_rtl:mk_switch(StartR, Map)] + end], + VarMap2, + ConstTab}. + + +%% Generate the jumptable for Cases while filling in unused positions +%% with the fail label + +dense_interval(Cases, Min, IcodeFail) -> + dense_interval(Cases, Min, IcodeFail, 0, 0). +dense_interval([Pair = {Const,_}|Rest], Pos, Fail, Range, NoEntries) -> + Val = hipe_icode:const_value(Const), + if + Pos < Val -> + {Max, Res} = + dense_interval([Pair|Rest], Pos+1, Fail, Range+1, NoEntries), + {Max,[{hipe_icode:mk_const(Pos), Fail}|Res]}; + true -> + {Max, Res} = dense_interval(Rest, Pos+1, Fail, Range+1, NoEntries+1), + {Max, [Pair | Res]} + end; +dense_interval([], Max, _, _, _) -> + {Max-1, []}. + + +%%------------------------------------------------------------------------- +%% switch_val without jumptable +%% + +gen_slow_switch_val(I, VarMap, ConstTab, Options) -> + Is = rewrite_switch_val(I), + ?IF_DEBUG_LEVEL(3,?msg("Switch: ~w\n", [Is]), no_debug), + hipe_icode2rtl:translate_instrs(Is, VarMap, ConstTab, Options). + +rewrite_switch_val(I) -> + Var = hipe_icode:switch_val_term(I), + Fail = hipe_icode:switch_val_fail_label(I), + Cases = hipe_icode:switch_val_cases(I), + rewrite_switch_val_cases(Cases, Fail, Var). + +rewrite_switch_val_cases([{C,L}|Cases], Fail, Arg) -> + Tmp = hipe_icode:mk_new_var(), + NextLab = hipe_icode:mk_new_label(), + [hipe_icode:mk_move(Tmp, C), + hipe_icode:mk_if(op_exact_eqeq_2, [Arg, Tmp], L, + hipe_icode:label_name(NextLab)), + NextLab | + rewrite_switch_val_cases(Cases, Fail, Arg)]; +rewrite_switch_val_cases([], Fail, _Arg) -> + [hipe_icode:mk_goto(Fail)]. + + +%%------------------------------------------------------------------------- +%% switch_val with binary search jumptable +%% + +gen_search_switch_val(Arg, Cases, Default, VarMap, ConstTab, _Options) -> + ValTableR = hipe_rtl:mk_new_reg_gcsafe(), + + {Values,_Labels} = split_cases(Cases), + {NewConstTab,Id} = hipe_consttab:insert_sorted_block(ConstTab, Values), + {LabMap,VarMap1} = lbls_from_cases(Cases,VarMap), + + Code = + [hipe_rtl:mk_load_address(ValTableR, Id, constant)| + tab(Values,LabMap,Arg,ValTableR,Default)], + {Code, VarMap1, NewConstTab}. + + +%%------------------------------------------------------------------------- +%% +%% tab/5 creates RTL code for a binary search. +%% It requires two sorted tables one with the keys to search +%% through and one with the corresponding labels to jump to. +%% +%% The implementation is derived from John Bentlys +%% Programming Pearls. +%% +%% Input: +%% KeyList - A list of keys to search through. +%% (Just used to calculate the number of elements.) +%% LableList - A list of labels to jump to. +%% KeyReg - A register containing the key to search for. +%% TablePntrReg - A register containing a pointer to the +%% tables with keys +%% Default - A lable to jump to if the key is not found. +%% +%% Example: +%% KeyTbl: < a, b, d, f, h, i, z > +%% Lbls: < 5, 3, 2, 4, 1, 7, 6 > +%% Default: 8 +%% KeyReg: v37 +%% TablePntrReg: r41 +%% +%% should give code like: +%% r41 <- KeyTbl +%% r42 <- 0 +%% r43 <- [r41+16] +%% if (r43 gt v37) then L17 (0.50) else L16 +%% L16: +%% r42 <- 16 +%% goto L17 +%% L17: +%% r46 <- r42 add 16 +%% r45 <- [r41+r46] +%% if (r45 gt v37) then L21 (0.50) else L20 +%% L20: +%% r42 <- r46 +%% goto L21 +%% L21: +%% r48 <- r42 add 8 +%% r47 <- [r41+r48] +%% if (r47 gt v37) then L23 (0.50) else L22 +%% L22: +%% r42 <- r48 +%% goto L23 +%% L23: +%% r50 <- r42 add 4 +%% r49 <- [r41+r50] +%% if (r49 gt v37) then L25 (0.50) else L24 +%% L24: +%% r42 <- r42 add 4 +%% goto L25 +%% L25: +%% if (r42 gt 28) then L6 (0.50) else L18 +%% L18: +%% r44 <- [r41+r42] +%% if (r44 eq v37) then L19 (0.90) else L8 +%% L19: +%% r42 <- r42 sra 2 +%% switch (r42) <L5, L3, L2, L4, L1, +%% L7, L6> + +%% +%% The search is done like a rolled out binary search, +%% but instead of starting in the middle we start at +%% the power of two closest above the middle. +%% +%% We let IndexReg point to the lower bound of our +%% search, and then we speculatively look at a +%% position at IndexReg + I where I is a power of 2. +%% +%% Example: Looking for 'h' in +%% KeyTbl: < a, b, d, f, h, i, z > +%% +%% We start with IndexReg=0 and I=4 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'f' < 'h' so we add I to IndexReg and divide I with 2 +%% IndexReg=4 and I=2 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg + I +%% +%% 'i' > 'h' so we keep IndexReg and divide I with 2 +%% IndexReg=4 and I=1 +%% < a, b, d, f, h, i, z > +%% ^ ^ +%% IndexReg+ I +%% Now we have found 'h' so we add I to IndexReg -> 5 +%% And we can load switch to the label at position 5 in +%% the label table. +%% +%% Now since the wordsize is 4 all numbers above are +%% Multiples of 4. + +tab(KeyList, LabelList, KeyReg, TablePntrReg, Default) -> + %% Calculate the size of the table: + %% the number of keys * wordsize + LastOffset = (length(KeyList)-1)*?WORDSIZE, + + %% Calculate the power of two closest to the size of the table. + Pow2 = 1 bsl trunc(math:log(LastOffset) / math:log(2)), + + %% Create some registers and lables that we need + IndexReg = hipe_rtl:mk_new_reg_gcsafe(), + Temp = hipe_rtl:mk_new_reg_gcsafe(), + Temp2 = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Lab4 = hipe_rtl:mk_new_label(), + + %% Calculate the position to start looking at + Init = (LastOffset)-Pow2, + + %% Create the code + [ + hipe_rtl:mk_move(IndexReg,hipe_rtl:mk_imm(0)), + hipe_rtl:mk_load(Temp,TablePntrReg,hipe_rtl:mk_imm(Init)), + hipe_rtl:mk_branch(Temp, geu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1), 0.5), + Lab1, + hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(Init+?WORDSIZE)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2] ++ + + step(Pow2 div 2, TablePntrReg, IndexReg, KeyReg) ++ + + [hipe_rtl:mk_branch(IndexReg, gt, hipe_rtl:mk_imm(LastOffset), + hipe_rtl:label_name(Default), + hipe_rtl:label_name(Lab3), 0.5), + Lab3, + hipe_rtl:mk_load(Temp2,TablePntrReg,IndexReg), + hipe_rtl:mk_branch(Temp2, eq, KeyReg, + hipe_rtl:label_name(Lab4), + hipe_rtl:label_name(Default), 0.9), + Lab4, + hipe_rtl:mk_alu(IndexReg, IndexReg, sra, + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_sorted_switch(IndexReg, LabelList, KeyList) + ]. + +step(I,TablePntrReg,IndexReg,KeyReg) -> + Temp = hipe_rtl:mk_new_reg_gcsafe(), + TempIndex = hipe_rtl:mk_new_reg_gcsafe(), + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(TempIndex, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_load(Temp,TablePntrReg,TempIndex), + hipe_rtl:mk_branch(Temp, gtu, KeyReg, + hipe_rtl:label_name(Lab2), + hipe_rtl:label_name(Lab1) , 0.5), + Lab1] ++ + case ?WORDSIZE of + I -> %% Recursive base case + [hipe_rtl:mk_alu(IndexReg, IndexReg, add, hipe_rtl:mk_imm(I)), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + ]; + _ -> %% Recursion case + [hipe_rtl:mk_move(IndexReg, TempIndex), + hipe_rtl:mk_goto(hipe_rtl:label_name(Lab2)), + Lab2 + | step(I div 2, TablePntrReg, IndexReg, KeyReg) + ] + end. + +%%------------------------------------------------------------------------- + +lbls_from_cases([{_,L}|Rest], VarMap) -> + {Map,VarMap1} = lbls_from_cases(Rest, VarMap), + {RtlL, VarMap2} = hipe_rtl_varmap:icode_label2rtl_label(L,VarMap1), + %% {[{label,hipe_rtl:label_name(RtlL)}|Map],VarMap2}; + {[hipe_rtl:label_name(RtlL)|Map],VarMap2}; +lbls_from_cases([], VarMap) -> + {[], VarMap}. + +%%------------------------------------------------------------------------- + +split_cases(L) -> + split_cases(L, [], []). + +split_cases([], Vs, Ls) -> {lists:reverse(Vs),lists:reverse(Ls)}; +split_cases([{V,L}|Rest], Vs, Ls) -> + split_cases(Rest, [hipe_icode:const_value(V)|Vs], [L|Ls]). + +%%------------------------------------------------------------------------- +%% +%% {switch_tuple_arity,X,Fail,N,[{A1,L1},...,{AN,LN}]} +%% +%% if not boxed(X) goto Fail +%% Hdr := *boxed_val(X) +%% switch_int(Hdr,Fail,[{H(A1),L1},...,{H(AN),LN}]) +%% where H(Ai) = make_arityval(Ai) +%% +%%------------------------------------------------------------------------- + +gen_switch_tuple(I, Map, ConstTab, _Options) -> + Var = hipe_icode:switch_tuple_arity_term(I), + {X, Map1} = hipe_rtl_varmap:icode_var2rtl_var(Var, Map), + Fail0 = hipe_icode:switch_tuple_arity_fail_label(I), + {Fail1, Map2} = hipe_rtl_varmap:icode_label2rtl_label(Fail0, Map1), + FailLab = hipe_rtl:label_name(Fail1), + {Cases, Map3} = + lists:foldr(fun({A,L}, {Rest,M}) -> + {L1,M1} = hipe_rtl_varmap:icode_label2rtl_label(L, M), + L2 = hipe_rtl:label_name(L1), + A1 = hipe_icode:const_value(A), + H1 = hipe_tagscheme:mk_arityval(A1), + {[{H1,L2}|Rest], M1} end, + {[], Map2}, + hipe_icode:switch_tuple_arity_cases(I)), + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + IsBoxedLab = hipe_rtl:mk_new_label(), + {[hipe_tagscheme:test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), + FailLab, 0.9), + IsBoxedLab, + hipe_tagscheme:get_header(Hdr, X) | + gen_switch_int(Hdr, FailLab, Cases)], + Map3, ConstTab}. + +%% +%% RTL-level switch-on-int +%% + +gen_switch_int(X, FailLab, [{C,L}|Rest]) -> + NextLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(C), L, + hipe_rtl:label_name(NextLab), 0.5), + NextLab | + gen_switch_int(X, FailLab, Rest)]; +gen_switch_int(_, FailLab, []) -> + [hipe_rtl:mk_goto(FailLab)]. + diff --git a/lib/hipe/rtl/hipe_rtl_primops.erl b/lib/hipe/rtl/hipe_rtl_primops.erl new file mode 100644 index 0000000000..560e0259f8 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_primops.erl @@ -0,0 +1,1259 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% ==================================================================== +%% Filename : hipe_rtl_primops.erl +%% Purpose : +%% Notes : +%% History : * 2001-03-15 Erik Johansson (happi@it.uu.se): +%% Created. +%% +%% $Id$ +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_primops). + +-export([gen_primop/3, gen_enter_primop/3, gen_call_builtin/6, + gen_enter_builtin/2]). + +%% -------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode_primops.hrl"). +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +%% -------------------------------------------------------------------- +%% Handling of known MFA builtins that are inline expanded + +gen_call_builtin(Fun, Dst, Args, IsGuard, Cont, Fail) -> + case Fun of + {erlang, apply, 3} -> + gen_apply(Dst, Args, Cont, Fail); + + {erlang, element, 2} -> + gen_element(Dst, Args, IsGuard, Cont, Fail); + + {erlang, self, 0} -> + gen_self(Dst, Cont); + + {erlang, is_tuple, 1} -> + gen_is_tuple(Dst, Args, Cont); + + {hipe_bifs, in_native, 0} -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [hipe_rtl:mk_load_atom(Dst1, true), hipe_rtl:mk_goto(Cont)]; + + _ -> [] % not a builtin + end. + +%% (Recall that enters cannot occur within a catch-region in the same +%% function, so we do not need to consider fail-continuations here.) +%% TODO: should we inline expand more functions here? Cf. above. +gen_enter_builtin(Fun, Args) -> + case Fun of + {erlang, apply, 3} -> + gen_enter_apply(Args); + +%% TODO +%% {erlang, element, 2} -> +%% gen_enter_element(Args, IsGuard); + +%% TODO +%% {erlang, self, 0} -> +%% gen_enter_self(); + + {hipe_bifs, in_native, 0} -> + Dst = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Dst, true), hipe_rtl:mk_return([Dst])]; + + _ -> [] % not a builtin + end. + +%% -------------------------------------------------------------------- +%% Generate code to jump to in case the inlined function fails. + +gen_fail_code(Fail, Type) -> + gen_fail_code(Fail, Type, false). + +gen_fail_code(Fail, Type, IsGuard) -> + case IsGuard of + true when Fail =/= [] -> + {Fail, []}; % go directly to target + false -> + NewLabel = hipe_rtl:mk_new_label(), + NewLabelName = hipe_rtl:label_name(NewLabel), + {NewLabelName, [NewLabel | fail_code(Fail, Type)]} + end. + +fail_code(Fail, Type) when is_atom(Type) -> + Var = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Var, Type), + hipe_rtl_exceptions:gen_fail(error, [Var], Fail)]; +fail_code(Fail, {Type, Value}) when is_atom(Type) -> + Var = hipe_rtl:mk_new_var(), + [hipe_rtl:mk_load_atom(Var, Type), + hipe_rtl:mk_gctest(3), % room for a 2-tuple + gen_mk_tuple(Var,[Var,Value]), + hipe_rtl_exceptions:gen_fail(error, [Var], Fail)]. + +fp_fail_code(TmpFailLbl, FailLbl) -> + [TmpFailLbl | + hipe_rtl_arch:handle_fp_exception() ++ + [fail_code(FailLbl, badarith)]]. + +%% -------------------------------------------------------------------- +%% CALL PRIMOP +%% +%% @doc +%% Generates RTL code for primops. This is mostly a dispatch function. +%% Tail calls to primops (enter_fun, apply, etc.) are not handled here! +%% @end + +gen_primop({Op,Dst,Args,Cont,Fail}, IsGuard, ConstTab) -> + GotoCont = hipe_rtl:mk_goto(Cont), + case Op of + %% + %% Binary Syntax + %% + {hipe_bs_primop, BsOP} -> + {FailLabelName, FailCode1} = gen_fail_code(Fail, badarg, IsGuard), + {SysLimLblName, FailCode2} = gen_fail_code(Fail, system_limit, IsGuard), + {Code1,NewConstTab} = + hipe_rtl_binary:gen_rtl(BsOP, Dst, Args, Cont, FailLabelName, + SysLimLblName, ConstTab), + {[Code1,FailCode1,FailCode2], NewConstTab}; + %% + %% Other primops + %% + _ -> + Code = + case Op of + %% Arithmetic + '+' -> + %gen_extra_unsafe_add_2(Dst, Args, Cont); + gen_add_sub_2(Dst, Args, Cont, Fail, Op, add); + '-' -> + gen_add_sub_2(Dst, Args, Cont, Fail, Op, sub); + '*' -> + gen_mul_2(Dst, Args, Cont, Fail); + '/' -> + %% BIF call: am_Div -> nbif_div_2 -> erts_mixed_div + [hipe_rtl:mk_call(Dst, '/', Args, Cont, Fail, not_remote)]; + 'gen_add' -> + gen_general_add_sub(Dst, Args, Cont, Fail, '+'); + 'gen_sub' -> + gen_general_add_sub(Dst, Args, Cont, Fail, '-'); + 'unsafe_add' -> + %gen_extra_unsafe_add_2(Dst, Args, Cont); + gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, '+', add); + 'extra_unsafe_add' -> + gen_extra_unsafe_add_2(Dst, Args, Cont); + 'unsafe_sub' -> + gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, '-', sub); + 'extra_unsafe_sub' -> + gen_extra_unsafe_sub_2(Dst, Args, Cont); + %'unsafe_mul' -> + % gen_unsafe_mul_2(Dst, Args, Cont, Fail, '*'); + 'div' -> + %% BIF call: am_div -> nbif_intdiv_2 -> intdiv_2 + [hipe_rtl:mk_call(Dst, 'div', Args, Cont, Fail, not_remote)]; + 'rem' -> + %% BIF call: am_rem -> nbif_rem_2 -> rem_2 + [hipe_rtl:mk_call(Dst, 'rem', Args, Cont, Fail, not_remote)]; + 'band' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'and'); + 'bor' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'or'); + 'bxor' -> + gen_bitop_2(Dst, Args, Cont, Fail, Op, 'xor'); + 'bnot' -> + gen_bnot_2(Dst, Args, Cont, Fail, Op); + 'bsr'-> + %% BIF call: am_bsr -> nbif_bsr_2 -> bsr_2 + gen_bsr_2(Dst, Args, Cont, Fail, Op); + %[hipe_rtl:mk_call(Dst, 'bsr', Args, Cont, Fail, not_remote)]; + 'bsl' -> + %% BIF call: am_bsl -> nbif_bsl_2 -> bsl_2 + [hipe_rtl:mk_call(Dst, 'bsl', Args, Cont, Fail, not_remote)]; + unsafe_band -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'and'); + unsafe_bor -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'or'); + unsafe_bxor -> + gen_unsafe_bitop_2(Dst, Args, Cont, 'xor'); + unsafe_bnot -> + gen_unsafe_bnot_2(Dst, Args, Cont); + unsafe_bsr -> + gen_unsafe_bsr_2(Dst, Args, Cont); + unsafe_bsl -> + gen_unsafe_bsl_2(Dst, Args, Cont); + %%--------------------------------------------- + %% List handling + %%--------------------------------------------- + cons -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_cons(Dst1, Args), GotoCont] + end; + unsafe_hd -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_unsafe_hd(Dst1, Args), GotoCont] + end; + unsafe_tl -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_unsafe_tl(Dst1, Args),GotoCont] + end; + %%--------------------------------------------- + %% Tuple handling + %%--------------------------------------------- + mktuple -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [gen_mk_tuple(Dst1, Args),GotoCont] + end; + #unsafe_element{index=N} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [Tuple] = Args, + [gen_unsafe_element(Dst1, hipe_rtl:mk_imm(N), Tuple),GotoCont] + end; + #unsafe_update_element{index=N} -> + [Dst1] = Dst, + [Tuple, Value] = Args, + [gen_unsafe_update_element(Tuple, hipe_rtl:mk_imm(N), Value), + hipe_rtl:mk_move(Dst1, Tuple), + GotoCont]; + {element, [TupleInfo, IndexInfo]} -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [Index, Tuple] = Args, + [gen_element_1(Dst1, Index, Tuple, IsGuard, Cont, Fail, + TupleInfo, IndexInfo)]; + + %%--------------------------------------------- + %% Apply-fixarity + %%--------------------------------------------- + #apply_N{arity = Arity} -> + gen_apply_N(Dst, Arity, Args, Cont, Fail); + + %%--------------------------------------------- + %% GC test + %%--------------------------------------------- + #gc_test{need = Need} -> + [hipe_rtl:mk_gctest(Need), GotoCont]; + + %%--------------------------------------------- + %% Process handling + %%--------------------------------------------- + redtest -> + [gen_redtest(1), GotoCont]; + %%--------------------------------------------- + %% Receives + %%--------------------------------------------- + check_get_msg -> + gen_check_get_msg(Dst, GotoCont, Fail); + next_msg -> + gen_next_msg(Dst, GotoCont); + select_msg -> + gen_select_msg(Dst, Cont); + clear_timeout -> + gen_clear_timeout(Dst, GotoCont); + set_timeout -> + %% BIF call: am_set_timeout -> nbif_set_timeout -> hipe_set_timeout + [hipe_rtl:mk_call(Dst, set_timeout, Args, Cont, Fail, not_remote)]; + suspend_msg -> + gen_suspend_msg(Dst, Cont); + %%--------------------------------------------- + %% Closures + %%--------------------------------------------- + call_fun -> + gen_call_fun(Dst, Args, Cont, Fail); + #mkfun{mfa=MFA, magic_num=MagicNum, index=Index} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + _ -> + [gen_mkfun(Dst, MFA, MagicNum, Index, Args), GotoCont] + end; + #closure_element{n=N} -> + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + [Closure] = Args, + [gen_closure_element(Dst1, hipe_rtl:mk_imm(N), Closure), + GotoCont] + end; + %%--------------------------------------------- + %% Floating point instructions. + %%--------------------------------------------- + fp_add -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fadd', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fadd', Arg2) + end; + fp_sub -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fsub', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fsub', Arg2) + end; + fp_mul -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fmul', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fmul', Arg2) + end; + fp_div -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp(hipe_rtl:mk_new_fpreg(), Arg1, 'fdiv', Arg2); + [Dst1] -> + hipe_rtl:mk_fp(Dst1, Arg1, 'fdiv', Arg2) + end; + fnegate -> + [Arg] = Args, + case Dst of + [] -> + hipe_rtl:mk_fp_unop(hipe_rtl:mk_new_fpreg(), Arg, 'fchs'); + [Dst1] -> + hipe_rtl:mk_fp_unop(Dst1, Arg, 'fchs') + end; + fclearerror -> + gen_fclearerror(); + fcheckerror -> + gen_fcheckerror(Cont, Fail); + conv_to_float -> + case Dst of + [] -> + gen_conv_to_float(hipe_rtl:mk_new_fpreg(), Args, Cont, Fail); + [Dst1] -> + gen_conv_to_float(Dst1, Args, Cont, Fail) + end; + unsafe_untag_float -> + [Arg] = Args, + case Dst of + [] -> + hipe_tagscheme:unsafe_untag_float(hipe_rtl:mk_new_fpreg(), + Arg); + [Dst1]-> + hipe_tagscheme:unsafe_untag_float(Dst1, Arg) + end; + unsafe_tag_float -> + [Arg] = Args, + case Dst of + [] -> + hipe_tagscheme:unsafe_tag_float(hipe_rtl:mk_new_var(), Arg); + [Dst1]-> + hipe_tagscheme:unsafe_tag_float(Dst1, Arg) + end; + + %% Only names listed above are accepted! MFA:s are not primops! + _ -> + erlang:error({bad_primop, Op}) + end, + {Code, ConstTab} + end. + +gen_enter_primop({Op, Args}, IsGuard, ConstTab) -> + case Op of + enter_fun -> + %% Tail-call to a closure must preserve tail-callness! + %% (Passing Continuation = [] to gen_call_fun/5 does this.) + Code = gen_call_fun([], Args, [], []), + {Code, ConstTab}; + + #apply_N{arity=Arity} -> + %% Tail-call to a closure must preserve tail-callness! + %% (Passing Continuation = [] to gen_apply_N/5 does this.) + Code = gen_apply_N([], Arity, Args, [], []), + {Code, ConstTab}; + + _ -> + %% All other primop tail calls are converted to call + return. + Dst = [hipe_rtl:mk_new_var()], + OkLab = hipe_rtl:mk_new_label(), + {Code,ConstTab1} = + gen_primop({Op,Dst,Args,hipe_rtl:label_name(OkLab),[]}, + IsGuard, ConstTab), + {Code ++ [OkLab, hipe_rtl:mk_return(Dst)], ConstTab1} + end. + + +%% -------------------------------------------------------------------- +%% ARITHMETIC +%% -------------------------------------------------------------------- + +%% +%% Inline addition & subtraction +%% + +gen_general_add_sub(Dst, Args, Cont, Fail, Op) -> + case Dst of + [] -> + [hipe_rtl:mk_call([hipe_rtl:mk_new_var()], + Op, Args, Cont, Fail, not_remote)]; + [Res] -> + [hipe_rtl:mk_call([Res], Op, Args, Cont, Fail, not_remote)] + end. + +gen_add_sub_2(Dst, Args, Cont, Fail, Op, AluOp) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Dst of + [] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel))| + gen_op_general_case(hipe_rtl:mk_new_var(), + Op, Args, Cont, Fail, GenCaseLabel)]; + [Res] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel)), + hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, GenCaseLabel)| + gen_op_general_case(Res,Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_add_sub_2(Dst, Args, Cont, Fail, Op, AluOp) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + case Fail of + []-> + GenCaseLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, GenCaseLabel)| + gen_op_general_case(Res,Op, Args, Cont, Fail, GenCaseLabel)]; + _ -> + [hipe_tagscheme:fixnum_addsub(AluOp, Arg1, Arg2, Res, + hipe_rtl:mk_label(Fail))] + end + end. + +gen_extra_unsafe_add_2(Dst, Args, Cont) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + hipe_tagscheme:unsafe_fixnum_add(Arg1, Arg2, Res) + end. + +gen_extra_unsafe_sub_2(Dst, Args, Cont) -> + [Arg1, Arg2] = Args, + case Dst of + [] -> + [hipe_rtl:mk_goto(Cont)]; + [Res] -> + hipe_tagscheme:unsafe_fixnum_sub(Arg1, Arg2, Res) + end. + +gen_op_general_case(Res, Op, Args, Cont, Fail, GenCaseLabel) -> + [hipe_rtl:mk_goto(Cont), + GenCaseLabel, + hipe_rtl:mk_call([Res], Op, Args, Cont, Fail, not_remote)]. + +%% +%% Inline multiplication +%% + +gen_mul_2(Dst, Args, Cont, Fail) -> + [Arg1,Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + {Res1,I2} = + case Dst of + [] -> + {hipe_rtl:mk_new_var(), []}; + [Res0] -> + {Res0, hipe_tagscheme:fixnum_mul(Arg1, Arg2, Res0, GenCaseLabel)} + end, + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, hipe_rtl:label_name(GenCaseLabel)), + I2, + %% BIF call: am_Times -> nbif_mul_2 -> erts_mixed_times + gen_op_general_case(Res1, '*', Args, Cont, Fail, GenCaseLabel)]. + +%% gen_unsafe_mul_2([Res], Args, Cont, Fail, Op) -> +%% [Arg1, Arg2] = Args, +%% GenCaseLabel = hipe_rtl:mk_new_label(), +%% [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, +%% hipe_rtl:label_name(GenCaseLabel)), +%% hipe_tagscheme:fixnum_mul(Arg1, Arg2, Res, GenCaseLabel)| +%% gen_op_general_case(Res, Op, Args, Cont, Fail, GenCaseLabel)]. + +%% +%% Inline bitoperations. +%% Only works for band, bor and bxor. +%% The shift operations are too expensive to inline. +%% + +gen_bitop_2(Res, Args, Cont, Fail, Op, BitOp) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Res of + [] -> %% The result is not used. + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel))| + gen_op_general_case(hipe_rtl:mk_new_var(), + Op, Args, Cont, Fail, GenCaseLabel)]; + [Res0] -> + [hipe_tagscheme:test_two_fixnums(Arg1, Arg2, + hipe_rtl:label_name(GenCaseLabel)), + hipe_tagscheme:fixnum_andorxor(BitOp, Arg1, Arg2, Res0)| + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_bitop_2(Res, Args, Cont, BitOp) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_andorxor(BitOp, Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +gen_bsr_2(Res, Args, Cont, Fail, Op) -> + [Arg1, Arg2] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case hipe_rtl:is_imm(Arg2) of + true -> + Val = hipe_tagscheme:fixnum_val(hipe_rtl:imm_value(Arg2)), + Limit = ?bytes_to_bits(hipe_rtl_arch:word_size()), + if + Val < Limit, Val >= 0 -> + case Res of + [] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg1, + hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), + 0.99), + FixLabel, + gen_op_general_case(hipe_rtl:mk_new_var(), Op, Args, Cont, Fail, + GenCaseLabel)]; + [Res0] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg1, + hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), + 0.99), + FixLabel, + hipe_tagscheme:fixnum_bsr(Arg1, Arg2, Res0), + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end; + true -> + [hipe_rtl:mk_call(Res, 'bsr', Args, Cont, Fail, not_remote)] + end; + false -> + [hipe_rtl:mk_call(Res, 'bsr', Args, Cont, Fail, not_remote)] + end. + +gen_unsafe_bsr_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_bsr(Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +gen_unsafe_bsl_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1, Arg2] = Args, + [hipe_tagscheme:fixnum_bsl(Arg1, Arg2, Res0), + hipe_rtl:mk_goto(Cont)] + end. + +%% +%% Inline not. +%% + +gen_bnot_2(Res, Args, Cont, Fail, Op) -> + [Arg] = Args, + GenCaseLabel = hipe_rtl:mk_new_label(), + case Res of + [] -> %% The result is not used. + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), 0.99), + FixLabel, + gen_op_general_case(hipe_rtl:mk_new_var(), Op, Args, Cont, Fail, + GenCaseLabel)]; + + [Res0] -> + FixLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_fixnum(Arg, hipe_rtl:label_name(FixLabel), + hipe_rtl:label_name(GenCaseLabel), 0.99), + FixLabel, + hipe_tagscheme:fixnum_not(Arg, Res0), + gen_op_general_case(Res0, Op, Args, Cont, Fail, GenCaseLabel)] + end. + +gen_unsafe_bnot_2(Res, Args, Cont) -> + case Res of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Res0] -> + [Arg1] = Args, + [hipe_tagscheme:fixnum_not(Arg1, Res0), + hipe_rtl:mk_goto(Cont)] + end. + + +%% -------------------------------------------------------------------- +%% + +%% +%% Inline cons +%% + +gen_cons(Dst, [Arg1, Arg2]) -> + Tmp = hipe_rtl:mk_new_reg(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + HeapNeed = 2*WordSize, + [GetHPInsn, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0), Arg1), + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(WordSize), Arg2), + hipe_rtl:mk_move(Tmp, HP), + hipe_tagscheme:tag_cons(Dst, Tmp), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn]. + +%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% -------------------------------------------------------------------- +%% Handling of closures... +%% -------------------------------------------------------------------- + +%% -------------------------------------------------------------------- +%% gen_mkfun +%% +%% The gc_test should have expanded to +%% unsigned needed = ERL_FUN_SIZE + num_free; +%% ErlFunThing* funp = (ErlFunThing *) HAlloc(p, needed); +%% +%% The code generated should do the equivalent of: +%% Copy arguments to the fun thing +%% Eterm* hp = funp->env; +%% for (i = 0; i < num_free; i++) { +%% *hp++ = reg[i]; +%% } +%% +%% Fill in fileds +%% funp->thing_word = HEADER_FUN; +%% funp->fe = fe; +%% funp->num_free = num_free; +%% funp->creator = p->id; +%% funp->native_code = fe->native_code; +%% Increase refcount +%% fe->refc++; +%% +%% Link to the process off_heap.funs list +%% funp->next = p->off_heap.funs; +%% p->off_heap.funs = funp; +%% +%% Tag the thing +%% return make_fun(funp); +%% +gen_mkfun([Dst], {_Mod, _FunId, _Arity} = MFidA, MagicNr, Index, FreeVars) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + NumFree = length(FreeVars), + + %% Copy arguments to the fun thing + %% Eterm* hp = funp->env; + %% for (i = 0; i < num_free; i++) { + %% *hp++ = reg[i]; + %% } + CopyFreeVarsCode = gen_free_vars(FreeVars, HP), + + %% Fill in fields + %% funp->thing_word = HEADER_FUN; + %% funp->fe = fe; + %% funp->num_free = num_free; + %% funp->creator = p->id; + %% funp->native_code = fe->native_code; + %% Increase refcount + %% fe->refc++; + SkeletonCode = gen_fun_thing_skeleton(HP, MFidA, NumFree, MagicNr, Index), + + %% Link to the process off_heap.funs list + %% funp->next = p->off_heap.funs; + %% p->off_heap.funs = funp; + LinkCode = gen_link_closure(HP), + + %% Tag the thing and increase the heap_pointer. + %% make_fun(funp); + WordSize�= hipe_rtl_arch:word_size(), + HeapNeed = (?ERL_FUN_SIZE + NumFree) * WordSize, + TagCode = [hipe_tagscheme:tag_fun(Dst, HP), + %% AdjustHPCode + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn], + [[GetHPInsn | CopyFreeVarsCode], SkeletonCode, LinkCode, TagCode]. + + +gen_fun_thing_skeleton(FunP, FunName={_Mod,_FunId,Arity}, NumFree, + MagicNr, Index) -> + %% Assumes that funp == heap_pointer + %% Fill in fields + %% funp->thing_word = HEADER_FUN; + %% funp->fe = fe; + %% funp->num_free = num_free; + %% funp->creator = p->id; + %% funp->native_code = fe->native_code; + %% And creates a fe (at load time). + FeVar = hipe_rtl:mk_new_reg(), + PidVar = hipe_rtl:mk_new_reg_gcsafe(), + NativeVar = hipe_rtl:mk_new_reg(), + + [hipe_rtl:mk_load_address(FeVar, {FunName, MagicNr, Index}, closure), + store_struct_field(FunP, ?EFT_FE, FeVar), + load_struct_field(NativeVar, FeVar, ?EFE_NATIVE_ADDRESS), + store_struct_field(FunP, ?EFT_NATIVE_ADDRESS, NativeVar), + + store_struct_field(FunP, ?EFT_ARITY, hipe_rtl:mk_imm(Arity-NumFree)), + + gen_inc_refc(FeVar, ?EFE_REFC), + + store_struct_field(FunP, ?EFT_NUM_FREE, hipe_rtl:mk_imm(NumFree)), + load_p_field(PidVar, ?P_ID), + store_struct_field(FunP, ?EFT_CREATOR, PidVar), + store_struct_field(FunP, ?EFT_THING, hipe_tagscheme:mk_fun_header())]. + +gen_inc_refc(Ptr, Offset) -> + case ?ERTS_IS_SMP of + 0 -> gen_inc_refc_notsmp(Ptr, Offset); + 1 -> gen_inc_refc_smp(Ptr, Offset) + end. + +gen_inc_refc_notsmp(Ptr, Offset) -> + Refc = hipe_rtl:mk_new_reg(), + [load_struct_field(Refc, Ptr, Offset, int32), + hipe_rtl:mk_alu(Refc, Refc, add, hipe_rtl:mk_imm(1)), + store_struct_field(Ptr, Offset, Refc, int32)]. + +gen_inc_refc_smp(Ptr, Offset) -> + Refc = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_alu(Refc, Ptr, 'add', hipe_rtl:mk_imm(Offset)), + hipe_rtl:mk_call([], 'atomic_inc', [Refc], [], [], not_remote)]. + +gen_link_closure(FUNP) -> + case ?P_OFF_HEAP_FUNS of + [] -> gen_link_closure_non_private(FUNP); + _ -> gen_link_closure_private(FUNP) + end. + +gen_link_closure_private(FUNP) -> + %% Link to the process off_heap.funs list + %% funp->next = p->off_heap.funs; + %% p->off_heap.funs = funp; + FunsVar = hipe_rtl:mk_new_reg(), + + [load_p_field(FunsVar,?P_OFF_HEAP_FUNS), + hipe_rtl:mk_store(FUNP, hipe_rtl:mk_imm(?EFT_NEXT), FunsVar), + store_p_field(FUNP,?P_OFF_HEAP_FUNS)]. + +gen_link_closure_non_private(_FUNP) -> []. + +load_p_field(Dst,Offset) -> + hipe_rtl_arch:pcb_load(Dst, Offset). +store_p_field(Src, Offset) -> + hipe_rtl_arch:pcb_store(Offset, Src). + +store_struct_field(StructP, Offset, Src) -> + hipe_rtl:mk_store(StructP, hipe_rtl:mk_imm(Offset), Src). + +load_struct_field(Dest, StructP, Offset) -> + hipe_rtl:mk_load(Dest, StructP, hipe_rtl:mk_imm(Offset)). + +store_struct_field(StructP, Offset, Src, int32) -> + hipe_rtl:mk_store(StructP, hipe_rtl:mk_imm(Offset), Src, int32). + +load_struct_field(Dest, StructP, Offset, int32) -> + hipe_rtl:mk_load(Dest, StructP, hipe_rtl:mk_imm(Offset), int32, signed). + +gen_free_vars(Vars, HPReg) -> + HPVar = hipe_rtl:mk_new_var(), + WordSize�= hipe_rtl_arch:word_size(), + [hipe_rtl:mk_alu(HPVar, HPReg, add, hipe_rtl:mk_imm(?EFT_ENV)) | + gen_free_vars(Vars, HPVar, 0, WordSize, [])]. + +gen_free_vars([Var|Vars], EnvPVar, Offset, WordSize, AccCode) -> + Code = hipe_rtl:mk_store(EnvPVar, hipe_rtl:mk_imm(Offset), Var), + gen_free_vars(Vars, EnvPVar, Offset + WordSize, WordSize, + [Code|AccCode]); +gen_free_vars([], _, _, _, AccCode) -> AccCode. + +%% ------------------------------------------------------------------ +%% +%% call_fun (also handles enter_fun when Continuation = []) + +gen_call_fun(Dst, ArgsAndFun, Continuation, Fail) -> + NAddressReg = hipe_rtl:mk_new_reg(), + ArityReg = hipe_rtl:mk_new_reg_gcsafe(), + [Fun|RevArgs] = lists:reverse(ArgsAndFun), + + %% {BadFunLabName, BadFunCode} = gen_fail_code(Fail, {badfun, Fun}), + Args = lists:reverse(RevArgs), + NonClosureLabel = hipe_rtl:mk_new_label(), + CallNonClosureLabel = hipe_rtl:mk_new_label(), + BadFunLabName = hipe_rtl:label_name(NonClosureLabel), + BadFunCode = + [NonClosureLabel, + hipe_rtl:mk_call([NAddressReg], + 'nonclosure_address', + [Fun, hipe_rtl:mk_imm(length(Args))], + hipe_rtl:label_name(CallNonClosureLabel), + Fail, + not_remote), + CallNonClosureLabel, + case Continuation of + [] -> + hipe_rtl:mk_enter(NAddressReg, Args, not_remote); + _ -> + hipe_rtl:mk_call(Dst, NAddressReg, Args, + Continuation, Fail, not_remote) + end], + + {BadArityLabName, BadArityCode} = gen_fail_code(Fail, {badarity, Fun}), + + CheckGetCode = + hipe_tagscheme:if_fun_get_arity_and_address(ArityReg, NAddressReg, + Fun, BadFunLabName, + 0.9), + CheckArityCode = check_arity(ArityReg, length(RevArgs), BadArityLabName), + CallCode = + case Continuation of + [] -> %% This is a tailcall + [hipe_rtl:mk_enter(NAddressReg, ArgsAndFun, not_remote)]; + _ -> %% Ordinary call + [hipe_rtl:mk_call(Dst, NAddressReg, ArgsAndFun, + Continuation, Fail, not_remote)] + end, + [CheckGetCode, CheckArityCode, CallCode, BadFunCode, BadArityCode]. + +check_arity(ArityReg, Arity, BadArityLab) -> + TrueLab1 = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_branch(ArityReg, eq, hipe_rtl:mk_imm(Arity), + hipe_rtl:label_name(TrueLab1), BadArityLab, 0.9), + TrueLab1]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% apply +%% +%% The tail call case is not handled here. + +gen_apply(Dst, Args = [_M,_F,_AppArgs], Cont, Fail) -> + %% Dst can be [Res] or []. + [hipe_rtl:mk_call(Dst, hipe_apply, Args, Cont, Fail, not_remote)]. + +gen_enter_apply(Args=[_M,_F,_AppArgs]) -> + %% 'apply' in tail-call context + [hipe_rtl:mk_enter(hipe_apply, Args, not_remote)]. + +%% +%% apply_N +%% also handles tailcall case (Cont=[]) +%% + +gen_apply_N(Dst, Arity, [M,F|CallArgs], Cont, Fail) -> + MM = hipe_rtl:mk_new_var(), + NotModuleLbl = hipe_rtl:mk_new_label(), + NotModuleLblName = hipe_rtl:label_name(NotModuleLbl), + Tuple = M, + Index = hipe_rtl:mk_imm(1), + IndexInfo = 1, + [hipe_tagscheme:element(MM, Index, Tuple, NotModuleLblName, unknown, IndexInfo), + gen_apply_N_common(Dst, Arity+1, MM, F, CallArgs ++ [M], Cont, Fail), + NotModuleLbl, + gen_apply_N_common(Dst, Arity, M, F, CallArgs, Cont, Fail)]. + +gen_apply_N_common(Dst, Arity, M, F, CallArgs, Cont, Fail) -> + CallLabel = hipe_rtl:mk_new_label(), + CodeAddress = hipe_rtl:mk_new_reg(), + [hipe_rtl:mk_call([CodeAddress], find_na_or_make_stub, + [M,F,hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(Arity))], + hipe_rtl:label_name(CallLabel), + Fail, not_remote), + CallLabel, + case Cont of + [] -> % tailcall + hipe_rtl:mk_enter(CodeAddress, CallArgs, not_remote); + _ -> % recursive call + hipe_rtl:mk_call(Dst, CodeAddress, CallArgs, Cont, Fail, not_remote) + end]. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% mkTuple +%% + +gen_mk_tuple(Dst, Elements) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + Arity = length(Elements), + WordSize = hipe_rtl_arch:word_size(), + HeapNeed = (Arity+1)*WordSize, + [GetHPInsn, + gen_tuple_header(HP, Arity), + set_tuple_elements(HP, WordSize, WordSize, Elements, []), + hipe_tagscheme:tag_tuple(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(HeapNeed)), + PutHPInsn]. + +set_tuple_elements(HP, Offset, WordSize, [Element|Elements], Stores) -> + Store = hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(Offset), Element), + set_tuple_elements(HP, Offset+WordSize, WordSize, Elements, [Store|Stores]); +set_tuple_elements(_, _, _, [], Stores) -> + lists:reverse(Stores). + +%% +%% @doc Generate RTL code for the reduction test. +%% +gen_redtest(Amount) -> + {GetFCallsInsn, FCalls, PutFCallsInsn} = hipe_rtl_arch:fcalls(), + SuspendLabel = hipe_rtl:mk_new_label(), + StayLabel = hipe_rtl:mk_new_label(), + ContinueLabel = hipe_rtl:mk_new_label(), + [GetFCallsInsn, + hipe_rtl:mk_alub(FCalls, FCalls, 'sub', hipe_rtl:mk_imm(Amount), 'lt', + hipe_rtl:label_name(SuspendLabel), + hipe_rtl:label_name(StayLabel), 0.01), + SuspendLabel, + %% The suspend path should not execute PutFCallsInsn. + hipe_rtl:mk_call([], suspend_0, [], + hipe_rtl:label_name(ContinueLabel), [], not_remote), + StayLabel, + PutFCallsInsn, + ContinueLabel]. + +gen_self(Dst, Cont) -> + case Dst of + [] -> %% The result is not used. + [hipe_rtl:mk_goto(Cont)]; + [Dst1] -> + [load_p_field(Dst1, ?P_ID), + hipe_rtl:mk_goto(Cont)] + end. + +%% +%% @doc Generate is_tuple/1 test +%% +gen_is_tuple(Dst, [Arg], Cont) -> + GotoCont = hipe_rtl:mk_goto(Cont), + case Dst of + [] -> %% The result is not used. + [GotoCont]; + [Dst1] -> + TrueLabel = hipe_rtl:mk_new_label(), + FalseLabel = hipe_rtl:mk_new_label(), + [hipe_tagscheme:test_tuple(Arg, hipe_rtl:label_name(TrueLabel), + hipe_rtl:label_name(FalseLabel), 0.5), + TrueLabel, + hipe_rtl:mk_load_atom(Dst1, true), + GotoCont, + FalseLabel, + hipe_rtl:mk_load_atom(Dst1, false), + GotoCont] + end. + +%% +%% @doc Generate unsafe head +%% +gen_unsafe_hd(Dst, [Arg]) -> hipe_tagscheme:unsafe_car(Dst, Arg). + +%% +%% @doc Generate unsafe tail +%% +gen_unsafe_tl(Dst, [Arg]) -> hipe_tagscheme:unsafe_cdr(Dst, Arg). + +%% +%% element +%% +gen_element(Dst, Args, IsGuard, Cont, Fail) -> + Dst1 = + case Dst of + [] -> %% The result is not used. + hipe_rtl:mk_new_var(); + [Dst0] -> Dst0 + end, + [Index, Tuple] = Args, + gen_element_1(Dst1, Index, Tuple, IsGuard, Cont, Fail, unknown, unknown). + +gen_element_1(Dst, Index, Tuple, IsGuard, Cont, Fail, TupleInfo, IndexInfo) -> + {FailLblName, FailCode} = gen_fail_code(Fail, badarg, IsGuard), + [hipe_tagscheme:element(Dst, Index, Tuple, FailLblName, TupleInfo, IndexInfo), + hipe_rtl:mk_goto(Cont), + FailCode]. + +%% +%% unsafe element +%% +gen_unsafe_element(Dst, Index, Tuple) -> + case hipe_rtl:is_imm(Index) of + true -> hipe_tagscheme:unsafe_constant_element(Dst, Index, Tuple); + false -> ?EXIT({illegal_index_to_unsafe_element,Index}) + end. + +gen_unsafe_update_element(Tuple, Index, Value) -> + case hipe_rtl:is_imm(Index) of + true -> + hipe_tagscheme:unsafe_update_element(Tuple, Index, Value); + false -> + ?EXIT({illegal_index_to_unsafe_update_element,Index}) + end. + + +gen_closure_element(Dst, Index, Closure) -> + hipe_tagscheme:unsafe_closure_element(Dst, Index, Closure). + +%% +%% @doc Generate RTL code that writes a tuple header. +%% +gen_tuple_header(Ptr, Arity) -> + Header = hipe_tagscheme:mk_arityval(Arity), + hipe_rtl:mk_store(Ptr, hipe_rtl:mk_imm(0), hipe_rtl:mk_imm(Header)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% +%%% Receives + +gen_check_get_msg(Dsts, GotoCont, Fail) -> + gen_check_get_msg_outofline(Dsts, GotoCont, Fail). + +gen_clear_timeout([], GotoCont) -> + case ?ERTS_IS_SMP of + 0 -> gen_clear_timeout_notsmp(GotoCont); + 1 -> gen_clear_timeout_smp(GotoCont) + end. + +-ifdef(notdef). % for reference, currently unused +%%% check_get_msg is: +%%% if (!PEEK_MESSAGE(p)) goto Fail; +%%% Dst = ERL_MESSAGE_TERM(PEEK_MESSAGE(p)); +%%% i.e., +%%% ErlMessage **save = p->msg.save; +%%% ErlMessage *msg = *save; +%%% if (!msg) goto Fail; +%%% Dst = msg->m[0]; +gen_check_get_msg_inline(Dsts, GotoCont, Fail) -> + Save = hipe_rtl:mk_new_reg(), + Msg = hipe_rtl:mk_new_reg(), + TrueLbl = hipe_rtl:mk_new_label(), + [load_p_field(Save, ?P_MSG_SAVE), + load_struct_field(Msg, Save, 0), + hipe_rtl:mk_branch(Msg, eq, hipe_rtl:mk_imm(0), Fail, + hipe_rtl:label_name(TrueLbl), 0.1), + TrueLbl | + case Dsts of + [Dst] -> + [load_struct_field(Dst, Msg, ?MSG_MESSAGE), + GotoCont]; + [] -> % receive which throws away the message + [GotoCont] + end]. +-endif. + +%%% next_msg is: +%%% SAVE_MESSAGE(p); +%%% i.e., +%%% ErlMessage **save = p->msg.save; +%%% ErlMessage *msg = *save; +%%% ErlMessage **next = &msg->next; +%%% p->msg.save = next; +gen_next_msg([], GotoCont) -> + Save = hipe_rtl:mk_new_reg(), + Msg = hipe_rtl:mk_new_reg(), + Next = hipe_rtl:mk_new_reg(), + [load_p_field(Save, ?P_MSG_SAVE), + load_struct_field(Msg, Save, 0), + hipe_rtl:mk_alu(Next, Msg, 'add', hipe_rtl:mk_imm(?MSG_NEXT)), + store_p_field(Next, ?P_MSG_SAVE), + GotoCont]. + +%%% clear_timeout is: +%%% p->flags &= ~F_TIMO; JOIN_MESSAGE(p); +%%% i.e., +%%% p->flags &= ~F_TIMO; +%%% p->msg.save = &p->msg.first; +gen_clear_timeout_notsmp(GotoCont) -> + Flags1 = hipe_rtl:mk_new_reg(), + Flags2 = hipe_rtl:mk_new_reg_gcsafe(), + First = hipe_rtl:mk_new_reg_gcsafe(), + [load_p_field(Flags1, ?P_FLAGS), + hipe_rtl:mk_alu(Flags2, Flags1, 'and', hipe_rtl:mk_imm(bnot(?F_TIMO))), + store_p_field(Flags2, ?P_FLAGS), + hipe_rtl_arch:pcb_address(First, ?P_MSG_FIRST), + store_p_field(First, ?P_MSG_SAVE), + GotoCont]. + +gen_check_get_msg_outofline(Dsts, GotoCont, Fail) -> + RetLbl = hipe_rtl:mk_new_label(), + TrueLbl = hipe_rtl:mk_new_label(), + Tmp = hipe_rtl:mk_new_reg(), + TheNonValue = hipe_rtl:mk_imm(hipe_tagscheme:mk_non_value()), + [hipe_rtl_arch:call_bif([Tmp], check_get_msg, [], + hipe_rtl:label_name(RetLbl), []), + RetLbl, + hipe_rtl:mk_branch(Tmp, eq, TheNonValue, Fail, + hipe_rtl:label_name(TrueLbl), 0.1), + TrueLbl | + case Dsts of + [Dst] -> + [hipe_rtl:mk_move(Dst, Tmp), + GotoCont]; + [] -> % receive which throws away the message + [GotoCont] + end]. + +gen_clear_timeout_smp(GotoCont) -> + RetLbl = hipe_rtl:mk_new_label(), + [hipe_rtl_arch:call_bif([], clear_timeout, [], + hipe_rtl:label_name(RetLbl), []), + RetLbl, + GotoCont]. + +gen_select_msg([], Cont) -> + [hipe_rtl_arch:call_bif([], select_msg, [], Cont, [])]. + +gen_suspend_msg([], Cont) -> + [hipe_rtl:mk_call([], suspend_msg, [], Cont, [], not_remote)]. + +%% -------------------------------------------------------------------- +%% +%% Floating point handling +%% + +gen_fclearerror() -> + case ?P_FP_EXCEPTION of + [] -> + []; + Offset -> + Tmp = hipe_rtl:mk_new_reg(), + FailLbl = hipe_rtl:mk_new_label(), + ContLbl = hipe_rtl:mk_new_label(), + ContLblName = hipe_rtl:label_name(ContLbl), + [hipe_rtl_arch:pcb_load(Tmp, Offset), + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), ContLblName, + hipe_rtl:label_name(FailLbl), 0.9), + FailLbl, + hipe_rtl:mk_call([], 'fclearerror_error', [], [], [], not_remote), + hipe_rtl:mk_goto(ContLblName), + ContLbl] + end. + +gen_fcheckerror(ContLbl, FailLbl) -> + case ?P_FP_EXCEPTION of + [] -> + []; + Offset -> + Tmp = hipe_rtl:mk_new_reg(), + TmpFailLbl0 = hipe_rtl:mk_new_label(), + FailCode = fp_fail_code(TmpFailLbl0, FailLbl), + PreFailLbl = hipe_rtl:mk_new_label(), + hipe_rtl_arch:fwait() ++ + [hipe_rtl_arch:pcb_load(Tmp, Offset), + hipe_rtl:mk_branch(Tmp, eq, hipe_rtl:mk_imm(0), ContLbl, + hipe_rtl:label_name(PreFailLbl), 0.9), + PreFailLbl, + hipe_rtl_arch:pcb_store(Offset, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_goto(hipe_rtl:label_name(TmpFailLbl0)) | + FailCode] + end. + +gen_conv_to_float(Dst, [Src], ContLbl, FailLbl) -> + case hipe_rtl:is_var(Src) of + true -> + Tmp = hipe_rtl:mk_new_var(), + TmpReg = hipe_rtl:mk_new_reg_gcsafe(), + TrueFixNum = hipe_rtl:mk_new_label(), + ContFixNum = hipe_rtl:mk_new_label(), + TrueFp = hipe_rtl:mk_new_label(), + ContFp = hipe_rtl:mk_new_label(), + ContBigNum = hipe_rtl:mk_new_label(), + TestFixNum = hipe_tagscheme:test_fixnum(Src, + hipe_rtl:label_name(TrueFixNum), + hipe_rtl:label_name(ContFixNum), + 0.5), + TestFp = hipe_tagscheme:test_flonum(Src, hipe_rtl:label_name(TrueFp), + hipe_rtl:label_name(ContFp), 0.5), + GotoCont = hipe_rtl:mk_goto(ContLbl), + TmpFailLbl0 = hipe_rtl:mk_new_label(), + FailCode = fp_fail_code(TmpFailLbl0, FailLbl), + + TestFixNum ++ + [TrueFixNum, + hipe_tagscheme:untag_fixnum(TmpReg, Src), + hipe_rtl:mk_fconv(Dst, TmpReg), + GotoCont, + ContFixNum] ++ + TestFp ++ + [TrueFp, + hipe_tagscheme:unsafe_untag_float(Dst, Src), + GotoCont, + ContFp] ++ + [hipe_rtl:mk_call([Tmp], conv_big_to_float, [Src], + hipe_rtl:label_name(ContBigNum), + hipe_rtl:label_name(TmpFailLbl0), not_remote)]++ + FailCode ++ + [ContBigNum, + hipe_tagscheme:unsafe_untag_float(Dst, Tmp)]; + _ -> + %% This must be an attempt to convert an illegal term. + [gen_fail_code(FailLbl, badarith)] + end. + diff --git a/lib/hipe/rtl/hipe_rtl_ssa.erl b/lib/hipe/rtl/hipe_rtl_ssa.erl new file mode 100644 index 0000000000..f55cc0dd5c --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa.erl @@ -0,0 +1,93 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%---------------------------------------------------------------------- +%% File : hipe_rtl_ssa.erl +%% Author : Kostis Sagonas <kostis@it.uu.se> +%% Created : 30 Jan 2004 +%% Purpose : Provides interface functions for converting RTL code into +%% SSA form and back using the generic SSA converter. +%%---------------------------------------------------------------------- + +-module(hipe_rtl_ssa). + +-export([uses_to_rename/1]). %% needed by hipe_rtl_ssa_const_prop + +%% The following defines are needed by the included file below +-define(CODE, hipe_rtl). +-define(CFG, hipe_rtl_cfg). +-define(LIVENESS, hipe_rtl_liveness). + +-include("hipe_rtl.hrl"). +-include("../ssa/hipe_ssa.inc"). + +%%---------------------------------------------------------------------- +%% Auxiliary operations which seriously differ between Icode and RTL. +%%---------------------------------------------------------------------- + +defs_to_rename(Statement) -> + Defs = hipe_rtl:defines(Statement), + [D || D <- Defs, not hipe_rtl_arch:is_precoloured(D)]. + +uses_to_rename(Statement) -> + Uses = hipe_rtl:uses(Statement), + [U || U <- Uses, not hipe_rtl_arch:is_precoloured(U)]. + +liveout_no_succ() -> + hipe_rtl_arch:live_at_return(). + +%----------------------------------------------------------------------- + +reset_var_indx() -> + hipe_gensym:set_var(rtl, hipe_rtl_arch:first_virtual_reg()). + +%%---------------------------------------------------------------------- + +is_fp_temp(Temp) -> + hipe_rtl:is_fpreg(Temp). + +mk_new_fp_temp() -> + hipe_rtl:mk_new_fpreg(). + +%----------------------------------------------------------------------- +%% Procedure : makePhiMove +%% Purpose : Create an RTL-specific version of a move instruction +%% depending on the type of the arguments. +%% Arguments : Dst, Src - the arguments of a Phi instruction that is +%% to be moved up the predecessor block as part +%% of the SSA un-convert phase. +%% Returns : Code +%% Note : ?CODE here is hipe_rtl +%%---------------------------------------------------------------------- + +makePhiMove(Dst, Src) -> + case hipe_rtl:is_fpreg(Dst) of + false -> + case hipe_rtl:is_fpreg(Src) of %% this test is just a sanity check + false -> + hipe_rtl:mk_move(Dst, Src) + end; + true -> + case hipe_rtl:is_fpreg(Src) of %% this test is just a sanity check + true -> + hipe_rtl:mk_fmove(Dst, Src) + end + end. + +%----------------------------------------------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl new file mode 100644 index 0000000000..cae6da542f --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa_avail_expr.erl @@ -0,0 +1,357 @@ +%%% +%%% %CopyrightBegin% +%%% +%%% Copyright Ericsson AB 2007-2009. All Rights Reserved. +%%% +%%% The contents of this file are subject to the Erlang Public License, +%%% Version 1.1, (the "License"); you may not use this file except in +%%% compliance with the License. You should have received a copy of the +%%% Erlang Public License along with this software. If not, it can be +%%% retrieved online at http://www.erlang.org/. +%%% +%%% Software distributed under the License is distributed on an "AS IS" +%%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%%% the License for the specific language governing rights and limitations +%%% under the License. +%%% +%%% %CopyrightEnd% +%%% +%%%------------------------------------------------------------------- +%%% File : hipe_rtl_ssa_avail_expr.erl +%%% Author : Per Gustafsson <pergu@it.uu.se> +%%% Description : A couple of optimizations on rtl_ssa +%%% 1. Remove unnecessary loads (Global) +%%% 2. Remove unnecessary stores (Local) +%%% 3. Remove unnecessary tag/untag operations +%%% +%%% Changed : 7 Feb 2007 by Per Gustafsson <pergu@it.uu.se> +%%%------------------------------------------------------------------- +-module(hipe_rtl_ssa_avail_expr). + +-export([cfg/1]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). + +cfg(CFG) -> + CFG1 = remove_loads(CFG), + CFG2 = remove_stores(CFG1), + CFG3 = optimize_fixnums(CFG2), + hipe_rtl_ssa:remove_dead_code(CFG3). + +%%%============================================================================= +%%% +%%% Remove unnecessary loads +%%% +%%%============================================================================= + +remove_loads(CFG) -> + LoadsFun = fun spread_info/2, + Info=fix_point(CFG, LoadsFun), + pass_through(CFG, LoadsFun, Info). + +spread_info(Code, Info) -> + lists:foldl(fun do_instr/2, {[],Info}, Code). + +do_instr(Instr, {Acc,Info}) -> + case Instr of + #call{} -> + {Acc++[Instr], new_env()}; + #store{} -> + {Acc++[Instr], new_env()}; + #gctest{} -> + {Acc++[Instr], new_env()}; + #load{} -> + Dst = hipe_rtl:load_dst(Instr), + LoadType = {hipe_rtl:load_src(Instr), hipe_rtl:load_offset(Instr), + hipe_rtl:load_size(Instr), hipe_rtl:load_sign(Instr)}, + NewInstr = + case lookup_y(LoadType, Info) of + none -> + Instr; + Var -> + hipe_rtl:mk_move(Dst, Var) + end, + Fun = fun load_filter_fun/2, + {Acc++[NewInstr], insert(Dst,LoadType,remove_defines(Instr,Info,Fun))}; + _ -> + {Acc++[Instr],remove_defines(Instr,Info,fun load_filter_fun/2)} + end. + +load_filter_fun({X1,{X2,X3,_,_}},PreColDefs) -> + not (lists:member(X1,PreColDefs) or + lists:member(X2,PreColDefs) or + lists:member(X3,PreColDefs)). + +%%%============================================================================= +%%% +%%% Remove unnecessary stores (local optimization) +%%% +%%%============================================================================= + +remove_stores(CFG) -> + pass_through(CFG, fun remove_store/2, new_info()). + +remove_store(Code,_) -> + remove_store_from_bb(Code). + +remove_store_from_bb(Code) -> + remove_store_from_bb(lists:reverse(Code), new_env(), []). + +remove_store_from_bb([Instr|Instrs], Env, Acc) -> + {NewAcc, NewEnv} = + case Instr of + #call{} -> + {[Instr|Acc],new_env()}; + #gctest{} -> + {[Instr|Acc], new_env()}; + #store{} -> + Base = hipe_rtl:store_base(Instr), + Offset = hipe_rtl:store_offset(Instr), + Size = hipe_rtl:store_size(Instr), + StoreType = {Base, Offset, Size}, + case lookup_y(StoreType, Env) of + none -> + {[Instr|Acc], insert(StoreType, true, Env)}; + true -> + {Acc, Env} + end; + #load{} -> + {[Instr|Acc],new_env()}; + _ -> + {[Instr|Acc],remove_defines(Instr,Env,fun store_filter_fun/2)} + end, + remove_store_from_bb(Instrs, NewEnv, NewAcc); +remove_store_from_bb([], Env, Acc) -> + {Acc,Env}. + +store_filter_fun({{X1,X2,_},_},PreColDefs) -> + not (lists:member(X1,PreColDefs) or + lists:member(X2,PreColDefs)). + +%%%============================================================================= +%%% +%%% Optimize Fixnum Operations +%%% +%%%============================================================================= + +optimize_fixnums(CFG) -> + FixFun = fun fixnum_opt/2, + Info=fix_point(CFG, FixFun), + pass_through(CFG, FixFun, Info). + +fixnum_opt(Code,Info) -> + lists:foldl(fun do_fixnums/2, {[],Info}, Code). + +do_fixnums(Instr, {Acc,Env}) -> + case Instr of + #call{} -> + {Acc++[Instr],Env}; + #gctest{} -> + {Acc++[Instr],Env}; + #fixnumop{dst=Dst,src=Src} -> + case lookup_y(Src,Env) of + none -> + case lookup_x(Src,Env) of + none -> + case hipe_rtl_arch:is_precoloured(Src) or + hipe_rtl_arch:is_precoloured(Dst) of + true -> + {Acc++[Instr],Env}; %% To Avoid non ssa problems + false -> + {Acc++[Instr],insert(Dst,Src,Env)} + end; + OtherSrc -> + {Acc++[hipe_rtl:mk_move(Dst,OtherSrc)],Env} + end; + OtherDst -> + {Acc++[hipe_rtl:mk_move(Dst,OtherDst)],Env} + end; + _ -> + {Acc++[Instr],Env} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Code handling functions +%% + +get_code_from_label(Label,CFG) -> + CurrentBB = hipe_rtl_cfg:bb(CFG, Label), + hipe_bb:code(CurrentBB). + +put_code_at_label(Label,Code,CFG) -> + NewBB = hipe_bb:mk_bb(Code), + hipe_rtl_cfg:bb_add(CFG, Label, NewBB). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% The info environment. +%% An info environment is a mapping from labels to info_out +%% + +new_info() -> + gb_trees:empty(). + +get_info(Label,Info) -> + case gb_trees:lookup(Label, Info) of + {value, V} -> V; + none -> none + end. + +add_info(Label, NewInfo, OldInfo) -> + gb_trees:enter(Label, NewInfo, OldInfo). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Simple worklist utility +%% + +add_succ_to_list(NewList, OldList) -> + RealNew = [New || New <- NewList, lists:member(New,OldList)], + OldList ++ RealNew. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Generic Fixpoint Code +%% + +fix_point(CFG, Fun) -> + Start = hipe_rtl_cfg:start_label(CFG), + Info = new_info(), + fix_point([Start], CFG, Fun, Info). + +fix_point([Label|Labels], CFG, Fun, Info) -> + case initial_stage(Label,CFG,Fun,Info) of + {true, _, _} -> + fix_point(Labels, CFG, Fun, Info); + {false, _, NewInfoOut} -> + Succ = hipe_rtl_cfg:succ(CFG, Label), + NewList = add_succ_to_list(Succ, Labels), + NewInfo = add_info(Label, NewInfoOut, Info), + fix_point(NewList, CFG, Fun, NewInfo) + end; +fix_point([], _CFG, _Fun, Info) -> + Info. + +pass_through(CFG, Fun, Info) -> + pass_through(hipe_rtl_cfg:reverse_postorder(CFG), + CFG, Fun, Info). + +pass_through([Label|Labels], CFG, Fun, Info) -> + {_, NewCode, _} = initial_stage(Label,CFG,Fun,Info), + NewCFG = put_code_at_label(Label,NewCode,CFG), + pass_through(Labels, NewCFG, Fun, Info); +pass_through([], CFG, _Fun, _Info) -> + CFG. + +initial_stage(Label,CFG,Fun,Info) -> + OldInfoOut = get_info(Label,Info), + Pred = hipe_rtl_cfg:pred(CFG,Label), + InfoEnv = join([get_info(L,Info) || L <- Pred]), + OldCode = get_code_from_label(Label,CFG), + {PhiCode,Code} = split_code(OldCode), + InfoIn = join_phi(PhiCode,Info,InfoEnv), + {NewCode, NewInfoOut} = Fun(Code, InfoIn), + {OldInfoOut=:=NewInfoOut,PhiCode++NewCode, NewInfoOut}. + +join_phi([#phi{dst=Dst,arglist=AList}|Rest], Info, Env) -> + case lists:foldl(fun(Val,Acc) -> + check_label(Val,Info,Acc) + end, none, AList) of + no_val -> + join_phi(Rest,Info,Env); + none -> + join_phi(Rest,Info,Env); + Expr -> + join_phi(Rest,Info,insert(Dst,Expr,Env)) + end; +join_phi([], _Info, Env) -> + Env. + +check_label({Lbl,Var}, Info, Acc) -> + case gb_trees:lookup(Lbl,Info) of + none -> Acc; + {value,Env} -> + case lookup_x(Var,Env) of + none -> no_val; + Acc -> Acc; + V -> + if Acc =:= none -> V; + true -> no_val + end + end + end. + +split_code(Code) -> + Phis = extract_phis(Code), + {Phis,Code--Phis}. + +extract_phis(Code) -> + [I || #phi{}=I <- Code]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% One2One Environment +%% + +new_env() -> + {gb_trees:empty(),gb_trees:empty()}. + +insert(X,Y,{XtoY,YtoX}) -> + NewYtoX = remove_old_binding(X,XtoY,YtoX), + NewXtoY = remove_old_binding(Y,YtoX,XtoY), + {gb_trees:enter(X,Y,NewXtoY), + gb_trees:enter(Y,X,NewYtoX)}. + +remove_old_binding(Key,LookupTree,ChangeTree) -> + case gb_trees:lookup(Key,LookupTree) of + none -> + ChangeTree; + {value,V} -> + gb_trees:balance(gb_trees:delete(V,ChangeTree)) + end. + +lookup_x(X,{XtoY,_YtoX}) -> + case gb_trees:lookup(X,XtoY) of + none -> none; + {value,Val} -> Val + end. + +lookup_y(Y,{_XtoY,YtoX}) -> + case gb_trees:lookup(Y,YtoX) of + none -> none; + {value,Val} -> Val + end. + +join([]) -> new_env(); +join([none]) -> new_env(); +join([E]) -> E; +join([E1,E2|Rest]) -> join([join(E1,E2)|Rest]). + +join({MapXY1,MapYX1},{MapXY2,MapYX2}) -> + {join_maps(MapXY1,MapXY2), + join_maps(MapYX1,MapYX2)}; +join(none,E) -> E; +join(E,none) -> E. + +join_maps(Map1,Map2) -> + OrdDict = ordsets:intersection(gb_trees:to_list(Map1), + gb_trees:to_list(Map2)), + gb_trees:from_orddict(OrdDict). + +remove_defines(Instr,Info,Fun) -> + Defs = hipe_rtl:defines(Instr), + case [Def || Def <- Defs, hipe_rtl_arch:is_precoloured(Def)] of + [] -> + Info; + PreColDefs -> + filter_environments(PreColDefs,Info,Fun) + end. + +filter_environments(PreColDefs,{M1,_M2},Fun) -> + L1 = gb_trees:to_list(M1), + F1 = [Tup || Tup <- L1, Fun(Tup,PreColDefs)], + F2 = [{Y,X} || {X,Y} <- F1], + {gb_trees:from_orddict(F1),gb_trees:from_orddict(orddict:from_list(F2))}. diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl new file mode 100644 index 0000000000..76c0a88933 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl @@ -0,0 +1,1082 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% ============================================================================ +%% Filename : hipe_rtl_ssa_const_prop.erl +%% Authors : Bjorn Bergman, Bjarni Juliusson +%% Purpose : Perform sparse conditional constant propagation on RTL. +%% Notes : Works on an SSA-converted control-flow graph. +%% +%% History : * 2004-03-14: Blatantly stolen from Icode (code by +%% Daniel Luna and Erik Andersson) and query-replaced for RTL. +%% * 2004-04-30: Added in the repository. +%% ============================================================================ +%% +%% Exports: propagate/1. +%% +%% ============================================================================ +%% +%% Some things to note: +%% +%% 1. All precoloured registers are assumed to contain bottom. We can not +%% do anything with them since they are not in SSA-form. This might be +%% possible to resolve in some way, but we decided to not go there. +%% +%% 2. const_labels are assumed to be bottom, we can not find the address +%% in any nice way (that I know of, maybe someone can help ?). I +%% suppose they don't get a value until linking (or some step that +%% resembles it). They are only affecting bignums and floats (at least +%% as far as I can tell), which are both stored in memory and hence +%% not handled very well by us anyway. +%% +%% 3. can v <- Constant be removed ? I think so. all uses of v will be +%% replaced with an immediate. So why not ? +%% +%% ============================================================================ +%% +%% TODO: +%% +%% Take care of failures in call and replace operation with apropriate +%% failure. +%% +%% Handle ifs with non-binary operators +%% +%% We want multisets for easier (and faster) creation of env->ssa_edges +%% +%% Propagation of constant arguments when some of the arguments are bottom +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +-module(hipe_rtl_ssa_const_prop). +-export([propagate/1]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). +-include("../flow/cfg.hrl"). + +%-define(DEBUG, true). + +-ifdef(DEBUG). +-define(SCCPDBG(W), W). +-define(DEBUG_TST, true). % make sure that we can use ?DEBUG in if-cases... +-else. +-define(DEBUG_TST, false). % make sure that we can use ?DEBUG in if-cases... +-define(SCCPDBG(W), ok). +-endif. + +%%----------------------------------------------------------------------------- +%% Include stuff shared between SCCP on Icode and RTL. +%% NOTE: Needs to appear after DEBUG is possibly defined. +%%----------------------------------------------------------------------------- + +-define(CODE, hipe_rtl). +-define(CFG, hipe_rtl_cfg). +-include("../ssa/hipe_ssa_const_prop.inc"). + +-type bool_lattice() :: 'true' | 'false' | 'top' | 'bottom'. +-type conditional() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le' + | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_expression/2 +%% Purpose : do a symbolic execution of the given instruction. This is just +%% a wrapper that chooses the right function to handle a particular +%% instruction. +%% Arguments : Instructions - the instruction +%% Environment - have a guess. +%% Returns : {FlowWorkList, SSAWorkList, Environment} +%%----------------------------------------------------------------------------- +visit_expression(Instruction, Environment) -> + case Instruction of + #alu{} -> + visit_alu(Instruction, Environment); + #alub{} -> + visit_alub(Instruction, Environment); + #branch{} -> + visit_branch(Instruction, Environment); + #call{} -> + visit_call(Instruction, Environment); +%% #comment{} -> +%% visit_comment(Instruction, Environment); +%% #enter{} -> +%% visit_enter(Instruction, Environment); + #fconv{} -> + visit_fconv(Instruction, Environment); + #fixnumop{} -> + visit_fixnumop(Instruction, Environment); + #fload{} -> + visit_fload(Instruction, Environment); + #fmove{} -> + visit_fmove(Instruction, Environment); + #fp{} -> + visit_fp(Instruction, Environment); + #fp_unop{} -> + visit_fp_unop(Instruction, Environment); +%% #fstore{} -> +%% visit_fstore(Instruction, Environment); +%% #gctest{} -> +%% visit_gctest(Instruction, Environment); + #goto{} -> + visit_goto(Instruction, Environment); + #goto_index{} -> + visit_goto_index(Instruction, Environment); +%% #label{} -> +%% visit_label(Instruction, Environment); + #load{} -> + visit_load(Instruction, Environment); + #load_address{} -> + visit_load_address(Instruction, Environment); + #load_atom{} -> + visit_load_atom(Instruction, Environment); + #load_word_index{} -> + visit_load_word_index(Instruction, Environment); + #move{} -> + visit_move(Instruction, Environment); + #multimove{} -> + visit_multimove(Instruction, Environment); +%% phi-nodes are handled in scc +%% #phi{} -> +%% visit_phi(Instruction, Environment); +%% #return{} -> +%% visit_return(Instruction, Environment); +%% #store{} -> +%% visit_store(Instruction, Environment); + #switch{} -> + visit_switch(Instruction, Environment); + _ -> + %% label, end_try, comment, return, fail, et al + {[], [], Environment} + end. + + +%%----------------------------------------------------------------------------- +%% Procedure : set_to/3 +%% Purpose : many of the visit_<inst> functions ends in a update of the +%% environment (and resulting SSA-edges) this function does the +%% update in a nice way and formats the result so that it can be +%% imediatly returned to visit_expression +%% Arguments : Dst - the destination may be a list of destinations. +%% Val - the new value (bottom, or some constant). +%% Env - the environment in which the update should be done. +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +set_to(Dst, Val, Env) -> + {Env1, SSAWork} = update_lattice_value({Dst, Val}, Env), + {[], SSAWork, Env1}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_branch/2 +%% Purpose : do symbolic exection of branch instructions. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_branch(Inst, Env) -> %% Titta ocks� p� exekverbarflagga + Val1 = lookup_lattice_value(hipe_rtl:branch_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:branch_src2(Inst), Env), + CFGWL = case evaluate_relop(Val1, hipe_rtl:branch_cond(Inst), Val2) of + true -> [hipe_rtl:branch_true_label(Inst)]; + false -> [hipe_rtl:branch_false_label(Inst)]; + bottom -> [hipe_rtl:branch_true_label(Inst), + hipe_rtl:branch_false_label(Inst)]; + top -> [] + end, + {CFGWL, [], Env}. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_relop/3 +%% Purpose : evaluate the given relop. While taking care to handle top & +%% bottom in some sane way. +%% Arguments : Val1, Val2 - The operands Integers or top or bottom +%% RelOp - some relop atom from rtl. +%% Returns : bottom, top, true or false +%%----------------------------------------------------------------------------- + +evaluate_relop(Val1, RelOp, Val2) -> + if + (Val1==bottom) or (Val2==bottom) -> bottom ; + (Val1==top) or (Val2==top) -> top; + true -> hipe_rtl_arch:eval_cond(RelOp, Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_fixnumop/2 +%% Purpose : try to evaluate a fixnumop. +%% Arguments : Val1 - operand (an integer, 'top' or 'bottom') +%% Op - the operation. +%% Returns : Result +%% where result is an integer, 'top' or 'bottom' +%%----------------------------------------------------------------------------- + +evaluate_fixnumop(Val1, Op) -> + if Val1 =:= top -> + top; + Val1 =:= bottom -> + bottom; + is_integer(Val1) -> + case Op of + tag -> + hipe_tagscheme:mk_fixnum(Val1); + untag -> + hipe_tagscheme:fixnum_val(Val1) + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_alu/2 +%% Purpose : do symbolic exection of a alu +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_alu(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alu_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alu_src2(Inst), Env), + {NewVal, _, _, _, _} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), + set_to(hipe_rtl:alu_dst(Inst), NewVal, Env). + +%% Here follows the alu-evaluation stuff. This is the most involved part I +%% guess. The function that you may want to use is evaluate_alu/3. The +%% evaluation functions returns +%% { Result, SignFlag, ZeroFlag, Overflow flag, CarryBit} +%% it uses some helpers which are explained breifly: +%% lattice_meet/2 - handles the general case of most alu-operations, called +%% when at least one of the operands is nonconstant, and the +%% operation-specifics have been taken care of. +%% all_ones/0 - returns the value of a rtl-word set to all 1 bits. +%% partial_eval_alu - tries to catch some operation specific special cases +%% when one (or both) of the operands is nonconstant. + +lattice_meet(Val1, Val2) -> + M = if (Val1 =:= top) or (Val2 =:= top) -> top; + (Val1 =:= bottom) or (Val2 =:= bottom) -> bottom + % the check is realy just sanity + end, + {M, M, M, M, M}. + +all_ones() -> + (1 bsl ?bytes_to_bits(hipe_rtl_arch:word_size())) - 1. + +%% when calling partial_eval*() we know that at least one of the Values +%% are bottom or top. They return { Value, Sign, Zero, Overflow, Carry }. +%% (just like hipe_rtl_arch:eval_alu) + +%% logic shifts are very similar each other. Limit is the number of +%% bits in the words. +partial_eval_shift(Limit, Val1, Val2) -> + if + Val2 =:= 0 -> {Val1, Val1, Val1, Val1, Val1}; + Val1 =:= 0 -> {0, false, true, false, false}; + is_integer(Val2), Val2 >= Limit -> % (Val2 =/= top) and (Val2 =/= bottom) + {0, false, true, Val1, Val1}; % OVerflow & carry we dont know about. + true -> lattice_meet(Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_eval_alu/3 +%% Purpose : try to evaluate as much as possible an alu operation where at +%% least one of the operands is not constant. +%% Arguments : Val1, Val2 - operands (integer, top or bottom) +%% Op - the operation. +%% Returns : {Result, Sign, Zero, Overflow, Carry} +%% where Result is an integer, 'top' or 'bottom' +%% and the others are bool, 'top' or 'bottom'. +%%----------------------------------------------------------------------------- + +partial_eval_alu(Val1, add, Val2) -> + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, sub, Val2) -> + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'or', Val2) -> + All_ones = all_ones(), + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == All_ones) or (Val2 == All_ones) -> + {All_ones, true, false, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'and', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> {Val2, Val2, Val2, false, false}; + Val2 == All_ones -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) or (Val2 == 0) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'xor', Val2) -> + if + (Val1 == 0) -> {Val2, Val2, Val2, false, false}; + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, 'xornot', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> {Val2, Val2, Val2, false, false}; + Val2 == All_ones -> {Val1, Val1, Val1, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, andnot, Val2) -> + All_ones = all_ones(), + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) or (Val2 == All_ones) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end; +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sll') or (Op =:= 'srl') -> + BitSize = ?bytes_to_bits(hipe_rtl_arch:word_size()), + partial_eval_shift(BitSize, Val1, Val2); +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sllx') or (Op =:= 'srlx') -> + partial_eval_shift(64, Val1, Val2); +partial_eval_alu(Val1, mul, Val2) -> lattice_meet(Val1, Val2); % XXX: suboptimal + +% arithmetic shifts are more tricky, shifting something unknown can +% generate all_ones() and 0 depenging on the sign of Val1. +partial_eval_alu(Val1, Op, Val2) when (Op =:= 'sra') or (Op =:= 'srax') -> + if + (Val2 == 0) -> {Val1, Val1, Val1, false, false}; + (Val1 == 0) -> {0, false, true, false, false}; + true -> lattice_meet(Val1, Val2) + end. + +%%----------------------------------------------------------------------------- +%% Procedure : evaluate_alu/3 +%% Purpose : try to evaluate as much as possible of a alu operation. +%% Arguments : Val1, Val2 - operands (an integer, 'top' or 'bottom') +%% Op - the operation. +%% Returns : {Result, Sign, Zero, Overflow, Carry} +%% where result is an integer, 'top' or 'bottom' +%% and the others are Bool, 'top' or 'bottom'. +%%----------------------------------------------------------------------------- + +evaluate_alu(Val1, Op, Val2) -> + if + (Val1 =:= top) or (Val2 =:= top) or + (Val1 =:= bottom) or (Val2 =:= bottom) -> partial_eval_alu(Val1, Op, Val2); + true -> + case Op of + sllx -> hipe_rtl_arith_64:eval_alu('sll', Val1, Val2); + srlx -> hipe_rtl_arith_64:eval_alu('srl', Val1, Val2); + srax -> hipe_rtl_arith_64:eval_alu('sra', Val1, Val2); + _ -> hipe_rtl_arch:eval_alu(Op, Val1, Val2) + end + end. + +maybe_top_or_bottom(List) -> + maybe_top_or_bottom(List, false). + +maybe_top_or_bottom([], TB) -> TB; +maybe_top_or_bottom([top | Rest], _) -> maybe_top_or_bottom(Rest, top); +maybe_top_or_bottom([bottom | _], _) -> bottom; +maybe_top_or_bottom([_ | Rest], TB) -> maybe_top_or_bottom(Rest, TB). + +-spec partial_eval_branch(conditional(), bool_lattice(), bool_lattice(), + bool_lattice() | 0, bool_lattice() | 0) -> + bool_lattice(). +partial_eval_branch(Cond, N0, Z0, V0, C0) -> + {N, Z, V, C} = + if Cond =:= 'eq'; + Cond =:= 'ne' -> {true, Z0, true, true}; + Cond =:= 'gt'; + Cond =:= 'le' -> {N0, Z0, V0, true}; + Cond =:= 'gtu' -> {true, Z0, true, C0 }; + Cond =:= 'lt'; + Cond =:= 'ge' -> {N0, true, V0, true}; + Cond =:= 'geu'; + Cond =:= 'ltu' -> {true, true, true, C0 }; + Cond =:= 'overflow'; + Cond =:= 'not_overflow' -> {true, true, V0, true} + end, + case maybe_top_or_bottom([N, Z, V, C]) of + false -> hipe_rtl_arch:eval_cond_bits(Cond, N, Z, V, C); + top -> top; + bottom -> bottom + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_alub/2 +%% Purpose : do symbolic exection of a alub instruction +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_alub(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alub_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alub_src2(Inst), Env), + {NewVal, N, Z, C, V} = evaluate_alu(Val1, hipe_rtl:alub_op(Inst), Val2), + Labels = + case NewVal of + bottom -> [hipe_rtl:alub_true_label(Inst), + hipe_rtl:alub_false_label(Inst)]; + top -> []; + _ -> + %if the partial branch cannot be evaluated we must execute the + % instruction at runtime. + case partial_eval_branch(hipe_rtl:alub_cond(Inst), N, Z, C, V) of + bottom -> [hipe_rtl:alub_true_label(Inst), + hipe_rtl:alub_false_label(Inst)]; + top -> []; + true -> [hipe_rtl:alub_true_label(Inst) ]; + false -> [hipe_rtl:alub_false_label(Inst) ] + end + end, + {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal, Env), + {Labels, NewSSA, NewEnv}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_fixnumop/2 +%% Purpose : do symbolic exection of a fixnumop instruction. +%% fixnumop is like a specialized alu. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : { FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_fixnumop(Inst, Env) -> + Val = lookup_lattice_value(hipe_rtl:fixnumop_src(Inst), Env), + Res = evaluate_fixnumop(Val, hipe_rtl:fixnumop_type(Inst)), + set_to(hipe_rtl:fixnumop_dst(Inst), Res, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_f* +%% Purpose : Do symbolic execution of floating point instructions. +%% All floating-point hitngs are mapped to bottom. In order to +%% implement them we would have to add hipe_rtl_arch:eval_f* +%% instructions since floating point is no exact science. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_fconv(Inst, Env) -> + set_to(hipe_rtl:fconv_dst(Inst), bottom, Env). + +visit_fp(Inst, Env) -> + set_to(hipe_rtl:fp_dst(Inst), bottom, Env). + +visit_fp_unop(Inst, Env) -> + set_to(hipe_rtl:fp_unop_dst(Inst), bottom, Env). + +visit_fload(Inst, Env) -> + set_to(hipe_rtl:fload_dst(Inst), bottom, Env). + +visit_fmove(Inst, Env) -> + set_to(hipe_rtl:fmove_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_move/2 +%% Purpose : execute a register-copy +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_move(Inst, Env) -> + Src = hipe_rtl:move_src(Inst), + Dst = hipe_rtl:move_dst(Inst), + set_to(Dst, lookup_lattice_value(Src, Env), Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_goto/2 +%% Purpose : execute a goto +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_goto(Instruction, Environment) -> + GotoLabel = hipe_rtl:goto_label(Instruction), + {[GotoLabel], [], Environment}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_goto_index/2 +%% Purpose : execute a goto_index +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_goto_index(Inst, Env) -> + Index = hipe_rtl:goto_index_index(Inst), + case lookup_lattice_value(Index, Env) of + top -> { [], [], Env }; + bottom -> %% everything is reachable + { hipe_rtl:goto_index_labels(Inst), [], Env }; + I -> %% only the ith label will be taken. + io:format("hipe_rtl_ssa_const_prop foud goto-index with constant index ~w in ~w\n", + [I, Inst]), + { [ lists:nth(hipe_rtl:goto_index_labels(Inst), I) ], [], Env } + end. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load/2 +%% Purpose : do a visit_load. Its hard to track whats in memory, and it's +%% not in ssa form, so let's assume bottom-values ! +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load(Inst, Env) -> + set_to(hipe_rtl:load_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_address/2 +%% Purpose : execute a load_address instruction, while there might be things +%% here that are runtime-constant they are not compile-time +%% constant since code loading interferes with addresses. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_address(Inst, Env) -> + Dst = hipe_rtl:load_address_dst(Inst), + Val = bottom, %% all these are probably run-time, but not + %% compile-time constants + set_to(Dst, Val, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_atom/2 +%% Purpose : Like loadadress this one gets something that is not +%% compiletime-constant +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_atom(Inst, Env) -> + set_to(hipe_rtl:load_atom_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_load_word_index/2 +%% Purpose : execute a load_word_index. Here is probably room for +%% improvement, we should be able to find some constants here, +%% since we can get the labeled values from the environment, and +%% then find the value with the given index. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_load_word_index(Inst, Env) -> + io:format(" this is load word index: ~w\n", [Inst]), + set_to(hipe_rtl:load_word_index_dst(Inst), bottom, Env). + +%%----------------------------------------------------------------------------- +%% Procedure : visit_multimove/2 & visit_multimove/4 +%% Purpose : execute a multimove instruction. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_multimove([Dst | Dsts], [Val | Vals], MyEnv, MySSA) -> + {NewEnv, NewSSA} = update_lattice_value({Dst, Val}, MyEnv), + visit_multimove(Dsts, Vals, NewEnv, MySSA ++ NewSSA); +visit_multimove([], [], MyEnv, MySSA) -> + {MyEnv, MySSA}. + +visit_multimove(Inst, Env) -> + Srcs = [lookup_lattice_value(S, Env) || + S <- hipe_rtl:multimove_srclist(Inst)], + {NewEnv, NewSSA} = visit_multimove(hipe_rtl:multimove_dstlist(Inst), + Srcs, Env, []), + {[], NewSSA, NewEnv}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_call/2 +%% Purpose : execute a call-instruction. All calls return bottom. We make +%% this assumption since the icode-leel have taken care of BIF's +%% and we belive that we are left with the things that can not be +%% done att compile time. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +visit_call(Inst, Env) -> + {Env1, SSAWork} = + update_lattice_value({hipe_rtl:call_dstlist(Inst), bottom}, Env), + % remeber to add both continuation & failto things to the cfgwl + Cont = case hipe_rtl:call_continuation(Inst) of + [] -> []; + C -> [C] + end, + Succ = case hipe_rtl:call_fail(Inst) of + [] -> Cont; + Fail -> [Fail | Cont] + end, + {Succ, SSAWork, Env1}. + +%%----------------------------------------------------------------------------- +%% Procedure : visit_switch/2 +%% Purpose : execute a switch-statement. +%% Arguments : Inst - The instruction +%% Env - The environment +%% Returns : {FlowWorkList, SSAWorkList, NewEnvironment} +%%----------------------------------------------------------------------------- + +%% first two helpers that are used to handle the mapping from value to label. +%% why isn't there a function that does this ? + +find_switch_label(Inst, Val) -> + Labels = hipe_rtl:switch_labels(Inst), + ?SCCPDBG(io:format("finding switch_label, ~w in ~w\n", [Val,Inst])), + %% it seems like the index is zero based. nth uses 1-based indexing. + lists:nth(Val + 1, Labels). + +%% Switches seem tricky. the sort-order is a list of key-values to be +%% tested in order. (if elem i matches then we should jump to elem i of +%% the labels-list) +visit_switch(Inst, Env) -> + case lookup_lattice_value(hipe_rtl:switch_src(Inst), Env) of + top -> + {[], [], Env}; + bottom -> + {hipe_rtl:switch_labels(Inst), [], Env}; + Val -> + {[find_switch_label(Inst, Val) ], [], Env} + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_instruction/2 +%% Purpose : update the given instruction using any information found in +%% the environment. +%% Arguments : Inst - the instruction +%% Environment - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +%% idea: what to do with vi <- Constant. wouldn't it be possible to +%% remove those ? (and similarily for alu-instructions. and alub +%% instructions also ! (of course this will be done in some later step dead +%% code elimination ? but it's a simple check.) +update_instruction(Inst, Env) -> + case Inst of + #alu{} -> + update_alu(Inst, Env); + #alub{} -> + update_alub(Inst, Env); + #branch{} -> + update_branch(Inst, Env); + #call{} -> + subst_all_uses(Inst, Env); +%% #comment{} -> +%% [Inst]; + #enter{} -> + subst_all_uses(Inst, Env); + #fconv{} -> + subst_all_uses(Inst, Env); + #fload{} -> + subst_all_uses(Inst, Env); + #fmove{} -> + subst_all_uses(Inst, Env); + #fp{} -> + subst_all_uses(Inst, Env); + #fp_unop{} -> + subst_all_uses(Inst, Env); + #fstore{} -> + subst_all_uses(Inst, Env); + #gctest{} -> + subst_all_uses(Inst, Env); +%% #goto{} -> +%% [ Inst ]; + #goto_index{} -> + update_goto_index(Inst, Env); +%% #label{} -> +%% [ Inst ]; + #load{} -> + subst_all_uses(Inst, Env); + #load_address{} -> + subst_all_uses(Inst, Env); + #load_atom{} -> + subst_all_uses(Inst, Env); + #load_word_index{} -> + subst_all_uses(Inst, Env); + #move{} -> + subst_all_uses(Inst, Env); + #multimove{} -> + subst_all_uses(Inst, Env); + #return{} -> + subst_all_uses(Inst, Env); + #store{} -> + subst_all_uses(Inst, Env); + #switch{} -> + update_switch(Inst, Env); + #phi{} -> + update_phi(Inst, Env); + _ -> % for the others it's sufficient to just update any thing they use. + [ Inst ] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : subst_uses/2 +%% Purpose : looks up all things that an instruction uses and replaces +%% anything that is determined to be constant. +%% Arguments : Inst - the instruction +%% Env - in which everything happen. +%% Returns : list of instructions to replace Inst with. +%%----------------------------------------------------------------------------- + +subst_all_uses(Inst, Env) -> + Uses = hipe_rtl_ssa:uses_to_rename(Inst), + [ hipe_rtl:subst_uses(update_srcs(Uses, Env), Inst) ]. + +%%----------------------------------------------------------------------------- +%% Procedure : update_srcs/2 +%% Purpose : given the things that a instruction use return a list +%% {Src, NewValue} pairs that can be sent to subs_uses. +%% Arguments : Srcs - list of uses +%% Env - in which everything happens. +%% Returns : list of {Src, NewValue} pairs. +%%----------------------------------------------------------------------------- + +update_srcs(Srcs, Env) -> + Update = + fun(Src, Os) -> + case lookup_lattice_value(Src, Env) of + bottom -> Os; + top -> % this would be realy strange. + ?EXIT({"update_src, top", Src }); + Constant -> + [ {Src, hipe_rtl:mk_imm(Constant)} | Os] + end + end, + lists:foldl(Update, [], Srcs ). + +%%----------------------------------------------------------------------------- +%% functions for performing partial evaluation of alu-operations. They can +%% return either an integer (the actual result), move_src1 or move_src2 in +%% which case the alu-operation can be replace with a move, or keep_it in +%% which case the instruction must be kept. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_update_shift/3 +%% Purpose : perform a shift +%% Arguments : Limit - the number of bits in the word to shift. +%% Val1 - the shiftee +%% Val2 - number of bits to shift +%% Returns : Integer, move_src1, keep_it +%%----------------------------------------------------------------------------- + +partial_update_shift(Limit, Val1, Val2) -> + if + (Val1 =:= bottom) and (Val2 =:= 0) -> move_src1; + (Val1 =:= 0) or ((Val2 =/= bottom) and (Val2 >= Limit)) -> 0; + true -> keep_it + end. + +%%----------------------------------------------------------------------------- +%% Procedure : partial_update_alu/3 +%% Purpose : perform as much of alu-operations where exatcly one of the +%% operands is bottom. +%% Arguments : Val1, Val2 - operands +%% Op - the operation. +%% Returns : Integer, move_src1, move_src2, keep_it +%%----------------------------------------------------------------------------- + +%% we know that exactly one of the operands are bottom this one +%% returns what to do with the instruction (it's either replace with +%% src1, replace src2 replace with constant or keep it. + +partial_update_alu(Val1, 'add', Val2) -> + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(_Val1, 'sub', Val2) -> + if + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, 'or', Val2) -> + All_ones = all_ones(), + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + (Val1 == All_ones) or (Val2 == All_ones) -> All_ones; + true -> keep_it + end; +partial_update_alu(Val1, 'and', Val2) -> + All_ones = all_ones(), + if + Val1 == All_ones -> move_src2; + Val2 == All_ones -> move_src1; + (Val1 == 0) or (Val2 == 0) -> 0; + true -> keep_it + end; +partial_update_alu(Val1, 'xor', Val2) -> + if + (Val1 == 0) -> move_src2; + (Val2 == 0) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, 'xornot', Val2) -> + All_ones = all_ones(), + if + (Val1 == All_ones) -> move_src2; + (Val2 == All_ones) -> move_src1; + true -> keep_it + end; +partial_update_alu(Val1, andnot, Val2) -> + All_ones = all_ones(), + if + Val2 == 0 -> move_src1; + (Val1 == 0) or (Val2 == All_ones) -> 0; + true -> keep_it + end; +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sll') or (Op =:= 'srl') -> + BitSize = ?bytes_to_bits(hipe_rtl_arch:word_size()), + partial_update_shift(BitSize, Val1, Val2); +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sllx') or (Op =:= 'srlx') -> + partial_update_shift(64, Val1, Val2); +partial_update_alu(Val1, Op, Val2) when (Op =:= 'sra') or (Op =:= 'srax') -> + if + Val2 == 0 -> move_src1; + Val1 == 0 -> 0; + true -> keep_it + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_alu/2 +%% Purpose : update an alu-instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instruction +%%----------------------------------------------------------------------------- + +update_alu(Inst, Env) -> + Val1 = lookup_lattice_value(hipe_rtl:alu_src1(Inst), Env), + Val2 = lookup_lattice_value(hipe_rtl:alu_src2(Inst), Env), + if + (Val1 =:= bottom) and (Val2 =:= bottom) -> + [Inst]; + (Val1 =:= bottom) or (Val2 =:= bottom) -> + NewInst = + case partial_update_alu(Val1, hipe_rtl:alu_op(Inst), Val2) of + move_src1 -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:alu_src1(Inst)); + move_src2 -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:alu_src2(Inst)); + keep_it -> + S1 = make_alub_subst_list(Val1, hipe_rtl:alu_src1(Inst), []), + S2 = make_alub_subst_list(Val2, hipe_rtl:alu_src2(Inst), S1), + hipe_rtl:subst_uses(S2, Inst); + Constant -> + hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Constant)) + end, + [NewInst]; + true -> + {Val,_,_,_,_} = evaluate_alu(Val1, hipe_rtl:alu_op(Inst), Val2), + [hipe_rtl:mk_move(hipe_rtl:alu_dst(Inst), hipe_rtl:mk_imm(Val))] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_branch/2 +%% Purpose : update an branch-instruction +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instruction +%%----------------------------------------------------------------------------- + +update_branch(Inst, Env) -> + Src1 = hipe_rtl:branch_src1(Inst), + Src2 = hipe_rtl:branch_src2(Inst), + Val1 = lookup_lattice_value(Src1, Env), + Val2 = lookup_lattice_value(Src2, Env), + if + (Val1 =:= bottom) and (Val2 =:= bottom) -> + [Inst]; + Val1 =:= bottom -> + [hipe_rtl:subst_uses([{Src2, hipe_rtl:mk_imm(Val2)}], Inst)]; + Val2 =:= bottom -> + [hipe_rtl:subst_uses([{Src1, hipe_rtl:mk_imm(Val1)}], Inst)]; + true -> + case hipe_rtl_arch:eval_cond(hipe_rtl:branch_cond(Inst), Val1, Val2) of + true -> [hipe_rtl:mk_goto(hipe_rtl:branch_true_label(Inst))]; + false -> [hipe_rtl:mk_goto(hipe_rtl:branch_false_label(Inst))] + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_alub/2 +%% Purpose : update an alub-instruction. Here are some finer points, we might +%% be able to do the math (think b = a+0), but it's hard to replace +%% the branch, since the mapping b/w AluOp,RelOp to BranchInstr is +%% boring to do. (lazyness is a bliss). +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions +%%----------------------------------------------------------------------------- + +%% some small helpers. +alub_to_move(Inst, Res, Lab) -> + [ hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res), + hipe_rtl:mk_goto(Lab) ]. + +make_alub_subst_list(bottom, _, Tail) -> Tail; +make_alub_subst_list(top, Src, _) -> + ?EXIT({"~w is top during update",Src }); +make_alub_subst_list(Val, Src, Tail) -> + case hipe_rtl:is_imm(Src) of + true -> Tail; + false -> [{Src, hipe_rtl:mk_imm(Val)} | Tail] + end. + +update_alub(Inst, Env) -> + Src1 = hipe_rtl:alub_src1(Inst), + Src2 = hipe_rtl:alub_src2(Inst), + Val1 = lookup_lattice_value(Src1, Env), + Val2 = lookup_lattice_value(Src2, Env), + {ResVal, N, Z, C, V} = evaluate_alu(Val1, hipe_rtl:alub_op(Inst), Val2), + CondRes = partial_eval_branch(hipe_rtl:alub_cond(Inst), N, Z, C, V), + case CondRes of + bottom -> + %% if we can't evaluate the branch, we have to keep it as a alub isnt + %% since other optimizations might insert other instructions b/w the + %% move and the branch. We can however replace variable with constants: + S1 = make_alub_subst_list(Val1, Src1, []), + S2 = make_alub_subst_list(Val2, Src2, S1), + [ hipe_rtl:subst_uses(S2, Inst) ]; + _ -> % we know where we will be going, let's find out what Dst should be. + % knowing where we are going means that at most one of the values is + % bottom, hence we can replace the alu-instr with a move. + % remember, a = b + 0 can give us enough info to know what jump to + % do without knowing the value of a. (I wonder if this will ever + % actualy happen ;) + Res = case ResVal of + bottom -> % something nonconstant. + if (Val1 =:= bottom) -> Src1; + (Val2 =:= bottom) -> Src2 + end; + _ -> hipe_rtl:mk_imm(ResVal) + end, + case CondRes of + top -> io:format("oops. something VERY bad: ~w ~w V1 & 2 ~w ~w\n", + [Inst, {ResVal, N, Z, C, V} , Val1, Val2]), + [Inst ]; + true -> alub_to_move(Inst, Res, hipe_rtl:alub_true_label(Inst)); + false -> alub_to_move(Inst, Res, hipe_rtl:alub_false_label(Inst)) + end + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_goto_index/2 +%% Purpose : update a goto_index instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +update_goto_index(Inst, Env) -> + Index = hipe_rtl:goto_index_index(Inst), + case lookup_lattice_value(Index, Env) of + bottom -> %% everything is reachable + [Inst]; + I -> %% only the ith label will be taken. + [hipe_rtl:mk_goto(lists:nth(hipe_rtl:goto_index_labels(Inst), I))] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_switch/2 +%% Purpose : update a switch instruction. +%% Arguments : Inst - the instruction. +%% Env - in which everything happens. +%% Returns : list of new instructions. +%%----------------------------------------------------------------------------- + +update_switch(Inst, Env) -> + case lookup_lattice_value(hipe_rtl:switch_src(Inst), Env) of + bottom -> + [Inst]; + Const -> + Lab = find_switch_label(Inst, Const), + [hipe_rtl:mk_goto(Lab)] + end. + +%%----------------------------------------------------------------------------- +%% Procedure : update_phi/3 +%% Purpose : Update a phi-function w.r.t. constants. do nothing for now. +%% Arguments : Instruction - The instruction +%% Environment - The environment +%% Returns : [NewInstruction] +%%----------------------------------------------------------------------------- + +update_phi(Instruction, Environment) -> + Destination = hipe_rtl:phi_dst(Instruction), + case lookup_lattice_value(Destination, Environment) of + bottom -> + [Instruction]; + top -> + ?WARNING_MSG("The dst of ~w is top after SCCP. Strange\n",[Instruction]), + ?EXIT({"bang !", Instruction}), + [Instruction]; + Value -> + [hipe_rtl:mk_move(Destination, hipe_rtl:mk_imm(Value))] + end. + +%%----------------------------------------------------------------------------- + +%% make sure that all precoloured rgisters are taken out of the equation. +lookup_lattice_value(X, Environment) -> + case hipe_rtl_arch:is_precoloured(X) or hipe_rtl:is_const_label(X) of + true -> + bottom; + false -> + lookup_lattice_value2(X, Environment) + end. + +lookup_lattice_value2(X, Environment) -> + LatticeValues = env__lattice_values(Environment), + case hipe_rtl:is_imm(X) of + true -> + hipe_rtl:imm_value(X); + false -> + case gb_trees:lookup(X, LatticeValues) of + none -> + io:format("~w~n",[LatticeValues]), + ?WARNING_MSG("Earlier compiler steps generated erroneous " + "code for X = ~w. We are ignoring this.\n",[X]), + bottom; + {value, top} -> + ?EXIT({"lookup_lattice_value, top", X}), + top; + {value, Y} -> + Y + end + end. + +%%----------------------------- End of file ----------------------------------- diff --git a/lib/hipe/rtl/hipe_rtl_ssapre.erl b/lib/hipe/rtl/hipe_rtl_ssapre.erl new file mode 100644 index 0000000000..a9e92e5688 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_ssapre.erl @@ -0,0 +1,1679 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% File : hipe_rtl_ssapre.erl +%% Author : He Bingwen and Fr�d�ric Haziza +%% Description : Performs Partial Redundancy Elimination on SSA form. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @doc +%% +%% This module implements the <a href="http://cs.wheaton.edu/%7Etvandrun/writings/spessapre.pdf">Anticipation-SSAPRE algorithm</a>, +%% with several modifications for Partial Redundancy Elimination on SSA form. +%% We actually found problems in this algorithm, so +%% we implement another version with several advantages: +%% - No loop for Xsi insertions +%% - No fix point iteration for the downsafety part +%% - Less computations for Will Be Available part +%% - Complexity of the overall algorithm is improved +%% +%% We were supposed to publish these results anyway :D +%% +%% @end +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_ssapre). + +-export([rtl_ssapre/2]). + +-include("../main/hipe.hrl"). +-include("hipe_rtl.hrl"). + +%%-define(SSAPRE_DEBUG, true ). %% When uncommented, produces debug printouts +-define( SETS, ordsets ). %% Which set implementation module to use +-define( CFG, hipe_rtl_cfg ). +-define( RTL, hipe_rtl ). +-define( BB, hipe_bb ). +-define( ARCH, hipe_rtl_arch ). +-define( GRAPH, digraph ). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Debugging stuff +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifndef(SSAPRE_DEBUG). +-define(pp_debug(_Str, _Args), ok). +-else. +-define(pp_debug(Str, Args), io:format(standard_io, Str, Args)). +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Records / Structures +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-record(xsi_link, {num}). %% Number is the index of the temporary (a Key into the Xsi Tree) +-record(temp, {key, var}). +-record(bottom, {key, var}). +-record(xsi, {inst, %% Associated instruction + def, %% Hypothetical temporary variable + %% that stores the result of the computation + label, %% Block Label where the xsi is inserted + opList, %% List of operands + cba, %% + later, %% + wba + }). + +-record(pre_candidate, {alu, def}). +-record(xsi_op, {pred, op}). + +-record(mp, {xsis, maps, preds, defs, uses, ndsSet}). +-record(block, {type, attributes}). + +-record(eop, {expr, var, stopped_by}). +-record(insertion, {code, from}). + +-record(const_expr, {var, value}). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Main function +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +rtl_ssapre(RtlSSACfg, Options) -> + %% io:format("\n################ Original CFG ################\n"), + %% hipe_rtl_cfg:pp(RtlSSACfg), + %% io:format("\n\n############ SSA-Form CHECK ==> ~w\n",[hipe_rtl_ssa:check(RtlSSACfg)]), + + {CFG2,XsiGraph,CFGGraph,MPs} = perform_Xsi_insertion(RtlSSACfg,Options), + %%?pp_debug("~n~n################ Xsi CFG ################\n",[]),pp_cfg(CFG2,XsiGraph), + XsiList = ?GRAPH:vertices(XsiGraph), + case XsiList of + [] -> + %% No Xsi + ?option_time(?pp_debug("~n~n################ No Xsi Inserted ################~n",[]),"RTL A-SSAPRE No Xsi inserted (skip Downsafety and Will Be Available)",Options), + ok; + _ -> + ?pp_debug("~n############ Downsafety ##########~n",[]), + ?option_time(perform_downsafety(MPs,CFGGraph,XsiGraph),"RTL A-SSAPRE Downsafety",Options), + ?pp_debug("~n~n################ CFG Graph ################~n",[]),pp_cfggraph(CFGGraph), + ?pp_debug("~n############ Will Be Available ##########~n",[]), + ?option_time(perform_will_be_available(XsiGraph,CFGGraph,Options),"RTL A-SSAPRE WillBeAvailable",Options) + end, + + ?pp_debug("~n############ No more need for the CFG Graph....Deleting...",[]),?GRAPH:delete(CFGGraph), + ?pp_debug("~n~n################ Xsi Graph ################~n",[]),pp_xsigraph(XsiGraph), + + ?pp_debug("~n############ Code Motion ##########~n",[]), + Labels = ?CFG:preorder(CFG2), + + ?pp_debug("~n~n################ Xsi CFG ################~n",[]),pp_cfg(CFG2,XsiGraph), + + init_redundancy_count(), + ?option_time(FinalCFG=perform_code_motion(Labels,CFG2,XsiGraph),"RTL A-SSAPRE Code Motion",Options), + + ?pp_debug("\n############ No more need for the Xsi Graph....Deleting...",[]),?GRAPH:delete(XsiGraph), + + %% io:format("\n################ Final CFG ################\n"), + %% hipe_rtl_cfg:pp(FinalCFG), + %% io:format("\n\n############ SSA-Form CHECK ==> ~w\n", + %% [hipe_rtl_ssa:check(FinalCFG)]), + ?pp_debug("\nSSAPRE : ~w redundancies were found\n",[get_redundancy_count()]), + + FinalCFG. + +%% ########################################################################## +%% ######################## XSI INSERTION ################################### +%% ########################################################################## + +perform_Xsi_insertion(Cfg, Options) -> + init_counters(), %% Init counters for Bottoms and Temps + DigraphOpts = [cyclic, private], + XsiGraph = digraph:new(DigraphOpts), + %% Be carefull, the digraph component is NOT garbage collected, + %% so don't create 20 millions of instances! + %% finds the longest depth + %% Depth-first, preorder traversal over Basic Blocks. + %%Labels = ?CFG:reverse_postorder(Cfg), + Labels = ?CFG:preorder(Cfg), + + ?pp_debug("~n~n############# Finding definitions for computation~n~n",[]), + ?option_time({Cfg2,XsiGraph} = find_definition_for_computations(Labels,Cfg,XsiGraph),"RTL A-SSAPRE Xsi Insertion, searching from instructions",Options), + + %% Active List creation + GeneratorXsiList = lists:sort(?GRAPH:vertices(XsiGraph)), + ?pp_debug("~n~n############# Inserted Xsis ~w",[GeneratorXsiList]), + ?pp_debug("~n~n############# Finding operands~n",[]), + ?option_time({Cfg3,XsiGraph} = find_operands(Cfg2,XsiGraph,GeneratorXsiList,0),"RTL A-SSAPRE Xsi Insertion, finding operands",Options), + + %% Creating the CFGGraph + ?pp_debug("~n~n############# Creating CFG Graph",[]), + ?pp_debug("~n############# Labels = ~w",[Labels]), + CFGGraph = digraph:new(DigraphOpts), + [StartLabel|Others] = Labels, % adding the start label as a leaf + ?pp_debug("~nAdding a vertex for the start label: ~w",[StartLabel]), + ?GRAPH:add_vertex(CFGGraph, StartLabel, #block{type = top}), + % Doing the others + ?option_time(MPs=create_cfggraph(Others,Cfg3,CFGGraph,[],[],[],XsiGraph),"RTL A-SSAPRE Xsi Insertion, creating intermediate 'SSAPRE Graph'",Options), + + %% Return the bloody collected information + {Cfg3,XsiGraph,CFGGraph,MPs}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_definition_for_computations([], Cfg, XsiGraph) -> + {Cfg,XsiGraph}; %% No more block to inspect in the depth-first order +find_definition_for_computations([Label|Rest], Cfg, XsiGraph) -> + Code = ?BB:code(?CFG:bb(Cfg,Label)), + {NewCfg,XsiGraph} = find_definition_for_computations_in_block(Label,Code,Cfg,[],XsiGraph), + find_definition_for_computations(Rest, NewCfg, XsiGraph). + +%%=========================================================================== +%% Searches from instruction for one block BlockLabel. +%% We process forward over instructions. + +find_definition_for_computations_in_block(BlockLabel,[],Cfg, + VisitedInstructions,XsiGraph)-> + Code = lists:reverse(VisitedInstructions), + NewBB = ?BB:mk_bb(Code), + NewCfg = ?CFG:bb_add(Cfg,BlockLabel,NewBB), + {NewCfg,XsiGraph}; %% No more instructions to inspect in this block +find_definition_for_computations_in_block(BlockLabel,[Inst|Rest],Cfg, + VisitedInstructions,XsiGraph) -> + %% ?pp_debug(" Inspecting instruction: ",[]),pp_instr(Inst,nil), + case Inst of + #alu{} -> + %% Is Inst interesting for SSAPRE? + %% i.e., is Inst an arithmetic operation which doesn't deal with precoloured? + %% Note that since we parse forward, we have no 'pre_candidate'-type so far. + case check_definition(Inst,VisitedInstructions,BlockLabel,Cfg,XsiGraph) of + {def_found,Def} -> + %% Replacing Inst in Cfg + NewInst = #pre_candidate{alu=Inst,def=Def}, + NewVisited = [NewInst|VisitedInstructions], + %% Recurse forward over instructions, same CFG, same XsiGraph + find_definition_for_computations_in_block(BlockLabel,Rest,Cfg, + NewVisited,XsiGraph); + {merge_point,Xsi} -> + Def = Xsi#xsi.def, + Key = Def#temp.key, + NewInst = #pre_candidate{alu=Inst,def=Def}, + XsiLink = #xsi_link{num=Key}, + + %% Add a vertex to the Xsi Graph + ?GRAPH:add_vertex(XsiGraph,Key,Xsi), + ?pp_debug(" Inserting Xsi: ",[]),pp_xsi(Xsi), + + Label = Xsi#xsi.label, + case BlockLabel =:= Label of + false -> + %% Insert the Xsi in the appropriate block + Code = hipe_bb:code(?CFG:bb(Cfg,Label)), + {BeforeCode,AfterCode} = split_for_xsi(lists:reverse(Code),[]), + NewCode = BeforeCode++[XsiLink|AfterCode], + NewBB = hipe_bb:mk_bb(NewCode), + NewCfg = ?CFG:bb_add(Cfg,Label,NewBB), + NewVisited = [NewInst|VisitedInstructions]; + _-> + {BeforeCode,AfterCode} = split_for_xsi(VisitedInstructions,[]), + TempVisited = BeforeCode++[XsiLink|AfterCode], + TempVisited2 = lists:reverse(TempVisited), + NewVisited = [NewInst|TempVisited2], + NewCfg = Cfg + end, + find_definition_for_computations_in_block(BlockLabel, Rest, NewCfg, + NewVisited, XsiGraph) + end; + _ -> + %%?pp_debug("~n [L~w] Not concerned with: ~w",[BlockLabel,Inst]), + %% If the instruction is not a SSAPRE candidate, we skip it and keep on + %% processing instructions + %% Prepend Inst, so that we have all in reverse order. + %% Easy to parse backwards + find_definition_for_computations_in_block(BlockLabel, Rest, Cfg, + [Inst|VisitedInstructions], XsiGraph) + end. + +%% ############################################################################ +%% We have E as an expression, I has an alu (arithmetic operation), and +%% we inspect backwards the previous instructions to find a definition for E. +%% Since we parse in forward order, we know that the previous SSAPRE +%% instruction will have a definition. + +check_definition(E,[],BlockLabel,Cfg,XsiGraph)-> + %% No more instructions in that block + %% No definition found in that block + %% Search is previous blocks + Preds = ?CFG:pred(Cfg, BlockLabel), + %% ?pp_debug("~n CHECKING DEFINITION ####### Is L~w a merge block? It has ~w preds. So far E=",[BlockLabel,length(Preds)]),pp_expr(E), + case Preds of + [] -> + %% Entry Point + {def_found,bottom}; + [P] -> + %% One predecessor only, we just keep looking for a definition in that block + VisitedInstructions = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,P))), + check_definition(E,VisitedInstructions,P,Cfg,XsiGraph); + _ -> + Temp = new_temp(), + %% It's a merge point + OpList = [#xsi_op{pred=X} || X<-Preds], + Xsi = #xsi{inst=E,def=Temp,label=BlockLabel,opList=OpList}, + {merge_point,Xsi} + end; +check_definition(E,[CC|Rest],BlockLabel,Cfg,XsiGraph) -> + SRC1 = ?RTL:alu_src1(E), + SRC2 = ?RTL:alu_src2(E), + case CC of + #alu{} -> + exit({?MODULE,should_not_be_an_alu, + {"Why the hell do we still have an alu???",CC}}); + #pre_candidate{} -> + %% C is the previous instruction + C = CC#pre_candidate.alu, + DST = ?RTL:alu_dst(C), + case DST =:= SRC1 orelse DST =:= SRC2 of + false -> + case check_match(E,C) of + true -> %% It's a computation of E! + %% Get the dst of the alu + {def_found,DST}; + _-> + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end; + true -> + %% Get the definition of C, since C is PRE-candidate AND has been processed before + DEF = CC#pre_candidate.def, + case DEF of + bottom -> + %% Def(E)=bottom, STOP + {def_found,bottom}; + _ -> + %% Emend E with this def(C) + %%?pp_debug("Parameters are E=~w, DST=~w, DEF=~w",[E,DST,DEF]), + F = emend(E,DST,DEF), + check_definition(F,Rest,BlockLabel,Cfg,XsiGraph) %% Continue the search + end + end; + #move{} -> + %% It's a move, we emend E, and continue the definition search + DST = ?RTL:move_dst(CC), + F = case SRC1 =:= DST orelse SRC2 =:= DST of + true -> + SRC = ?RTL:move_src(CC), + emend(E,DST,SRC); + _ -> + E + end, + check_definition(F,Rest,BlockLabel,Cfg,XsiGraph); %% Continue the search + #xsi_link{} -> + {_K,Xsi} = ?GRAPH:vertex(XsiGraph,CC#xsi_link.num), + C = Xsi#xsi.inst, + case check_match(C,E) of + true -> %% There is a Xsi already with a computation of E! + %% fetch definition of C, and give it to E + {def_found,Xsi#xsi.def}; + _-> + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end; + #phi{} -> + %% skip them. NOTE: Important to separate this case from the next one + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph); + _ -> + %% Note: the function calls or some other instructions can change the pre-coloured registers + %% which are able to be redefined. This breaks of course the SSA form. + %% If there is a redefinition we can give bottom to the computation, and no xsi will be inserted. + %% (In some sens, the result of the computation is new at that point.) + PreColouredTest = ?ARCH:is_precoloured(SRC1) orelse ?ARCH:is_precoloured(SRC2), + + %%RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)) orelse ?RTL:is_reg(SRC1) orelse ?RTL:is_reg(SRC2), + RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)), %% That means we cannot reuse the result held in this register... + + case PreColouredTest orelse RegisterTest of + true -> + {def_found,bottom}; + false -> + DC = ?RTL:defines(CC), + case lists:member(SRC1,DC) orelse lists:member(SRC2,DC) of + true -> + {def_found,bottom}; + false -> + %% Orthogonal to E, we continue the search + check_definition(E,Rest,BlockLabel,Cfg,XsiGraph) + end + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +check_match(E, C) -> + OpE = ?RTL:alu_op(E), + OpC = ?RTL:alu_op(C), + case OpE =:= OpC of + false -> + false; + true -> + Src1E = ?RTL:alu_src1(E), + Src2E = ?RTL:alu_src2(E), + Src1C = ?RTL:alu_src1(C), + Src2C = ?RTL:alu_src2(C), + case Src1E =:= Src1C of + true -> + Src2E =:= Src2C; + false -> + Src1E =:= Src2C andalso Src2E =:= Src1C + end + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +expr_is_const(E) -> + ?RTL:is_imm(?RTL:alu_src1(E)) andalso ?RTL:is_imm(?RTL:alu_src2(E)). +%% is_number(?RTL:alu_src1(E)) andalso is_number(?RTL:alu_src2(E)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Must be an arithmetic operation, i.e. #alu{} +emend(Expr, S, Var) -> + SRC1 = ?RTL:alu_src1(Expr), + NewExpr = case SRC1 =:= S of + true -> ?RTL:alu_src1_update(Expr,Var); + false -> Expr + end, + SRC2 = ?RTL:alu_src2(NewExpr), + case SRC2 =:= S of + true -> ?RTL:alu_src2_update(NewExpr,Var); + false -> NewExpr + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +split_for_xsi([], Acc) -> + {[], Acc}; % no_xsi_no_phi_found; +split_for_xsi([I|Is] = Code, Acc) -> %% [I|Is] in backward order, Acc in order + case I of + #xsi_link{} -> + {lists:reverse(Code), Acc}; + #phi{} -> + {lists:reverse(Code), Acc}; + _ -> + split_for_xsi(Is, [I|Acc]) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Phase 1.B : Search for operands +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +find_operands(Cfg,XsiGraph,[],_Count) -> + {Cfg,XsiGraph}; +find_operands(Cfg,XsiGraph,ActiveList,Count) -> + {NewCfg,TempActiveList} = find_operands_for_active_list(Cfg,XsiGraph,ActiveList,[]), + NewActiveList = lists:reverse(TempActiveList), + ?pp_debug("~n################ Finding operands (iteration ~w): ~w have been introduced. Now ~w in total~n", + [Count+1, length(NewActiveList), length(?GRAPH:vertices(XsiGraph))]), + find_operands(NewCfg,XsiGraph,NewActiveList,Count+1). + +find_operands_for_active_list(Cfg,_XsiGraph,[],ActiveListAcc) -> + {Cfg,ActiveListAcc}; +find_operands_for_active_list(Cfg,XsiGraph,[K|Ks],ActiveListAcc) -> + {_Key,Xsi} = ?GRAPH:vertex(XsiGraph,K), + ?pp_debug("~n Inspecting operands of : ~n",[]),pp_xsi(Xsi), + Preds = ?CFG:pred(Cfg, Xsi#xsi.label), + {NewCfg,NewActiveListAcc}=determine_operands(Xsi,Preds,Cfg,K,XsiGraph,ActiveListAcc), + {_Key2,Xsi2} = ?GRAPH:vertex(XsiGraph,K), + ?pp_debug("~n ** Final Xsi: ~n",[]),pp_xsi(Xsi2), + ?pp_debug("~n #####################################################~n",[]), + find_operands_for_active_list(NewCfg,XsiGraph,Ks,NewActiveListAcc). + +determine_operands(_Xsi,[],Cfg,_K,_XsiGraph,ActiveAcc) -> + %% All operands have been determined. + %% The CFG is not updated, only the XsiGraph + {Cfg,ActiveAcc}; +determine_operands(Xsi,[P|Ps],Cfg,K,XsiGraph,ActiveAcc) -> + Label = Xsi#xsi.label, + ReverseCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,Label))), + VisitedInstructions = get_visited_instructions(Xsi,ReverseCode), + Res = determine_e_prime(Xsi#xsi.inst,VisitedInstructions,P,XsiGraph), + case Res of + operand_is_bottom -> + NewXsi = xsi_arg_update(Xsi,P,new_bottom()), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + operand_is_const_expr -> + NewXsi = xsi_arg_update(Xsi,P,new_bottom()), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + {sharing_operand,Op} -> + NewXsi = xsi_arg_update(Xsi,P,Op), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + {revised_expression,E_prime} -> + ?pp_debug(" E' is determined : ",[]),pp_expr(E_prime), + ?pp_debug(" and going along the edge L~w~n",[P]), + %% Go along the edge P + RevCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,P))), + case check_one_operand(E_prime,RevCode,P,Cfg,K,XsiGraph) of + {def_found,Def} -> + NewXsi = xsi_arg_update(Xsi,P,Def), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {expr_found,ChildExpr} -> + NewXsi = xsi_arg_update(Xsi,P,ChildExpr), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {expr_is_const, Op} -> + %% We detected that the expression is of the form: 'N op M' + %% where N and M are constant. + NewXsi = xsi_arg_update(Xsi,P,Op), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + determine_operands(NewXsi,Ps,Cfg,K,XsiGraph,ActiveAcc); + + {merge_point,XsiChild} -> + %% Update that Xsi, give its definition as Operand for the + %% search, and go on + XsiChildDef = XsiChild#xsi.def, + NewXsi = xsi_arg_update(Xsi,P,XsiChildDef), + ?GRAPH:add_vertex(XsiGraph,K,NewXsi), + + KeyChild = XsiChildDef#temp.key, + XsiChildLink = #xsi_link{num=KeyChild}, + ?GRAPH:add_vertex(XsiGraph,KeyChild,XsiChild), + + %% Should not be the same block !!!!!!! + RCode = lists:reverse(hipe_bb:code(?CFG:bb(Cfg,XsiChild#xsi.label))), + {BCode,ACode} = split_code_for_xsi(RCode,[]), + + NewCode = BCode++[XsiChildLink|ACode], + NewBB = hipe_bb:mk_bb(NewCode), + NewCfg = ?CFG:bb_add(Cfg, XsiChild#xsi.label, NewBB), + + ?pp_debug(" -- ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" causes insertion of: ~n",[]),pp_xsi(XsiChild), + ?pp_debug(" -- Adding an edge ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" -> ",[]),pp_arg(XsiChild#xsi.def), + + %% Adding an edge... + %%?GRAPH:add_edge(XsiGraph,K,KeyChild,"family"), + ?GRAPH:add_edge(XsiGraph,K,KeyChild), + determine_operands(NewXsi,Ps,NewCfg,K,XsiGraph,[KeyChild|ActiveAcc]) + end + end. + +determine_e_prime(Expr,VisitedInstructions,Pred,XsiGraph) -> + %% MUST FETCH FROM THE XSI TREE, since Xsis are not updated yet in the CFG + NewExpr = emend_with_phis(Expr,VisitedInstructions,Pred), + emend_with_processed_xsis(NewExpr,VisitedInstructions,Pred,XsiGraph). + +emend_with_phis(EmendedE, [], _) -> + EmendedE; +emend_with_phis(E, [I|Rest], Pred) -> + case I of + #phi{} -> + Dst = ?RTL:phi_dst(I), + UE = ?RTL:uses(E), %% Should we get SRC1 and SRC2 instead? + case lists:member(Dst, UE) of + false -> + emend_with_phis(E, Rest, Pred); + true -> + NewE = emend(E, Dst, ?RTL:phi_arg(I,Pred)), + emend_with_phis(NewE, Rest, Pred) + end; + _ -> + emend_with_phis(E, Rest, Pred) + end. + +emend_with_processed_xsis(EmendedE, [], _, _) -> + {revised_expression,EmendedE}; +emend_with_processed_xsis(E, [I|Rest], Pred, XsiGraph) -> + case I of + #xsi_link{} -> + Key = I#xsi_link.num, + {_KK,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + Def = Xsi#xsi.def, + UE = ?RTL:uses(E), %% Should we get SRC1 and SRC2 instead? + case lists:member(Def,UE) of + false -> + CE = Xsi#xsi.inst, + case check_match(E,CE) of + true -> %% It's a computation of E! + case xsi_arg(Xsi,Pred) of + undetermined_operand -> + exit({?MODULE,check_operand_sharing,"######## �h Dear, we trusted Kostis !!!!!!!!! #############"}); + XsiOp -> + {sharing_operand,XsiOp} %% They share operands + end; + _-> + emend_with_processed_xsis(E,Rest,Pred,XsiGraph) + end; + true -> + A = xsi_arg(Xsi,Pred), + %% ?pp_debug(" ######### xsi_arg(I:~w,Pred:~w) = ~w~n",[I,Pred,A]), + case A of + #bottom{} -> + operand_is_bottom; + #const_expr{} -> + operand_is_const_expr; + #eop{} -> + NewE = emend(E,Def,A#eop.var), + emend_with_processed_xsis(NewE,Rest,Pred,XsiGraph); + undetermined_operand -> + exit({?MODULE,emend_with_processed_xsis,"######## �h Dear, we trusted Kostis, again !!!!!!!!! #############"}); + XsiOp -> + NewE = emend(E,Def,XsiOp), + emend_with_processed_xsis(NewE,Rest,Pred,XsiGraph) + end + end; + _ -> + emend_with_processed_xsis(E,Rest,Pred,XsiGraph) + end. + +%% get_visited_instructions(Xsi,[]) -> +%% ?pp_debug("~nWe don't find this xsi with def ",[]),pp_arg(Xsi#xsi.def),?pp_debug(" in L~w : ",[Xsi#xsi.label]), +%% exit({?MODULE,no_such_xsi_in_block,"We didn't find that Xsi in the block"}); +get_visited_instructions(Xsi, [I|Is]) -> + case I of + #xsi_link{} -> + XsiDef = Xsi#xsi.def, + Key = XsiDef#temp.key, + case I#xsi_link.num =:= Key of + true -> + Is; + false -> + get_visited_instructions(Xsi, Is) + end; + _ -> + get_visited_instructions(Xsi, Is) + end. + +split_code_for_xsi([], Acc) -> + {[],Acc}; +split_code_for_xsi([I|Is] = Code, Acc) -> + case I of + #xsi_link{} -> + {lists:reverse(Code), Acc}; + #phi{} -> + {lists:reverse(Code), Acc}; + _ -> + split_code_for_xsi(Is, [I|Acc]) + end. + +check_one_operand(E, [], BlockLabel, Cfg, XsiKey, XsiGraph) -> + %% No more instructions in that block + %% No definition found in that block + %% Search is previous blocks + Preds = ?CFG:pred(Cfg, BlockLabel), + case Preds of + [] -> + %% Entry Point + {def_found,new_bottom()}; + [P] -> + %% One predecessor only, we just keep looking for a definition in that block + case expr_is_const(E) of + true -> + ?pp_debug("\n\n############## Wow expr is constant: ~w",[E]), + Var = ?RTL:mk_new_var(), + Value = eval_expr(E), + Op = #const_expr{var = Var, value = Value}, + {expr_is_const, Op}; + false -> + VisitedInstructions = lists:reverse(?BB:code(?CFG:bb(Cfg,P))), + check_one_operand(E, VisitedInstructions, P, Cfg, XsiKey, XsiGraph) + end; + _ -> + %% It's a merge point + case expr_is_const(E) of + true -> + ?pp_debug("\n\n############## Wow expr is constant at merge point: ~w",[E]), + Var = ?RTL:mk_new_var(), + Value = eval_expr(E), + Op = #const_expr{var = Var, value = Value}, + {expr_is_const, Op}; + false -> + Temp = new_temp(), + OpList = [#xsi_op{pred = X} || X <- Preds], + Xsi = #xsi{inst = E, def = Temp, label = BlockLabel, opList = OpList}, + {merge_point, Xsi} + end + end; +check_one_operand(E, [CC|Rest], BlockLabel, Cfg, XsiKey, XsiGraph) -> + SRC1 = ?RTL:alu_src1(E), + SRC2 = ?RTL:alu_src2(E), + %% C is the previous instruction + case CC of + #alu{} -> + exit({?MODULE,should_not_be_an_alu, + {"Why the hell do we still have an alu???",CC}}); + #xsi{} -> + exit({?MODULE,should_not_be_a_xsi, + {"Why the hell do we still have a xsi???",CC}}); + #pre_candidate{} -> + C = CC#pre_candidate.alu, + DST = ?RTL:alu_dst(C), + case DST =:= SRC1 orelse DST =:= SRC2 of + true -> + %% Get the definition of C, since C is PRE-candidate AND has + %% been processed before + DEF = CC#pre_candidate.def, + case DEF of + bottom -> + %% Def(E)=bottom, STOP + %% No update of the XsiGraph + {def_found,new_bottom()}; + _-> + %% Simply emend + F = emend(E,DST,DEF), + ?pp_debug("~nEmendation : E= ",[]),pp_expr(E),?pp_debug(" ==> E'= ",[]),pp_expr(F),?pp_debug("~n",[]), + check_one_operand(F,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end; + false -> + case check_match(C,E) of + true -> %% It's a computation of E! + %% It should give DST and not Def + %% No update of the XsiGraph, cuz we use DST and not Def + %% The operand is therefore gonna be a real variable + {def_found,DST}; + _-> + %% Nothing to do with E + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end + end; + #move{} -> + %% It's a move, we emend E, and continue the definition search + DST = ?RTL:move_dst(CC), + case SRC1 =:= DST orelse SRC2 =:= DST of + true -> + SRC = ?RTL:move_src(CC), + F = emend(E,DST,SRC), + check_one_operand(F,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); %% Continue the search + _ -> + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) %% Continue the search + end; + #xsi_link{} -> + Key = CC#xsi_link.num, + %% Is Key a family member of XsiDef ? + {_KK,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + C = Xsi#xsi.inst, + case check_match(E,C) of + true -> %% There is a Xsi already with a computation of E! + %% fetch definition of C, and give it to E + %% Must update an edge in the XsiGraph, and here, we know it's a Temp + %% Note: this can create a loop (= a cycle of length 1) + ?pp_debug(" -- Found a cycle with match: Adding an edge t~w -> t~w",[XsiKey,Key]), + ?GRAPH:add_edge(XsiGraph,XsiKey,Key), + {def_found,Xsi#xsi.def}; + _ -> + case ?GRAPH:get_path(XsiGraph,Key,XsiKey) of + false -> + %% Is it a loop back to itself??? + case Key =:= XsiKey of + false -> + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); + _ -> + {expr_found,#eop{expr=E,var=?RTL:mk_new_var(),stopped_by=Key}} + end; + _ -> + %% Returning the expression instead of looping + %% And in case of no match + ExprOp = #eop{expr=E,var=?RTL:mk_new_var(),stopped_by=Key}, + {expr_found,ExprOp} + end + end; + #phi{} -> %% skip them + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph); + _ -> + PreColouredTest = ?ARCH:is_precoloured(SRC1) orelse ?ARCH:is_precoloured(SRC2), + + %%RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)) orelse ?RTL:is_reg(SRC1) orelse ?RTL:is_reg(SRC2), + RegisterTest = ?RTL:is_reg(?RTL:alu_dst(E)), + case PreColouredTest orelse RegisterTest of + true -> + {def_found,new_bottom()}; + _-> + DC = ?RTL:defines(CC), + case lists:member(SRC1,DC) orelse lists:member(SRC2,DC) of + true -> + {def_found,new_bottom()}; + _ -> + %% Orthogonal to E, we continue the search + check_one_operand(E,Rest,BlockLabel,Cfg,XsiKey,XsiGraph) + end + end + end. + +eval_expr(E) -> + ?pp_debug("~n Evaluating the result of ~w~n", [E]), + Op1 = ?RTL:alu_src1(E), + Op2 = ?RTL:alu_src2(E), + true = ?RTL:is_imm(Op1), + Val1 = ?RTL:imm_value(Op1), + true = ?RTL:is_imm(Op2), + Val2 = ?RTL:imm_value(Op2), + {Result, _Sign, _Zero, _Overflow, _Carry} = ?ARCH:eval_alu(?RTL:alu_op(E), Val1, Val2), + ?pp_debug("~n Result is then ~w~n", [Result]), + ?RTL:mk_imm(Result). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%% CREATTING CFGGRAPH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +create_cfggraph([],_Cfg,CFGGraph,ToBeFactorizedAcc,MPAcc,LateEdges,_XsiGraph) -> + ?pp_debug("~n~n ############# PostProcessing ~n~w~n",[LateEdges]), + post_process(LateEdges,CFGGraph), + ?pp_debug("~n~n ############# Factorizing ~n~w~n",[ToBeFactorizedAcc]), + factorize(ToBeFactorizedAcc,CFGGraph), + MPAcc; +create_cfggraph([Label|Ls],Cfg,CFGGraph,ToBeFactorizedAcc,MPAcc,LateEdges,XsiGraph) -> + Preds = ?CFG:pred(Cfg, Label), + case Preds of + [] -> + exit({?MODULE,do_not_call_on_top,{"Why the hell do we call that function on the start label???",Label}}); + [P] -> + Code = ?BB:code(?CFG:bb(Cfg, Label)), + Defs = get_defs_in_non_merge_block(Code, []), + ?pp_debug("~nAdding a vertex for ~w", [Label]), + Succs = ?CFG:succ(Cfg, Label), + case Succs of + [] -> %% Exit point + ?GRAPH:add_vertex(CFGGraph, Label, #block{type = exit}), + NewToBeFactorizedAcc = ToBeFactorizedAcc; + _ -> %% Split point + ?GRAPH:add_vertex(CFGGraph,Label,#block{type=not_mp,attributes={P,Succs}}), + NewToBeFactorizedAcc = [Label|ToBeFactorizedAcc] + end, + ?pp_debug("~nAdding an edge ~w -> ~w (~w)",[P,Label,Defs]), + case ?GRAPH:add_edge(CFGGraph,P,Label,Defs) of + {error,Reason} -> + exit({?MODULE,forget_that_for_christs_sake_bingwen_please,{"Bad edge",Reason}}); + _ -> + ok + end, + create_cfggraph(Ls,Cfg,CFGGraph,NewToBeFactorizedAcc,MPAcc,LateEdges,XsiGraph); + _ -> %% Merge point + Code = ?BB:code(?CFG:bb(Cfg,Label)), + {Defs,Xsis,Maps,Uses} = get_info_in_merge_block(Code,XsiGraph,[],[],gb_trees:empty(),gb_trees:empty()), + Attributes = #mp{preds=Preds,xsis=Xsis,defs=Defs,maps=Maps,uses=Uses}, + MergeBlock = #block{type=mp,attributes=Attributes}, + ?pp_debug("~nAdding a vertex for ~w with Defs= ~w",[Label,Defs]), + ?GRAPH:add_vertex(CFGGraph,Label,MergeBlock), + %% Add edges + NewLateEdges = add_edges_for_mp(Preds,Label,LateEdges), + create_cfggraph(Ls,Cfg,CFGGraph,ToBeFactorizedAcc,[Label|MPAcc],NewLateEdges,XsiGraph) + end. + +get_defs_in_non_merge_block([], Acc) -> + ?SETS:from_list(Acc); +get_defs_in_non_merge_block([Inst|Rest], Acc) -> + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + case Def of + #temp{} -> + %% {temp,Key,_Var} -> + %% get_defs_in_non_merge_block(Rest,[Key|Acc]); + get_defs_in_non_merge_block(Rest, [Def#temp.key|Acc]); + _-> %% Real variables or bottom + get_defs_in_non_merge_block(Rest, Acc) + end; + _ -> + get_defs_in_non_merge_block(Rest, Acc) + end. + +get_info_in_merge_block([],_XsiGraph,Defs,Xsis,Maps,Uses) -> + {?SETS:from_list(Defs),Xsis,Maps,Uses}; %% Xsis are in backward order +get_info_in_merge_block([Inst|Rest],XsiGraph,Defs,Xsis,Maps,Uses) -> + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + case Def of + #temp{} -> + get_info_in_merge_block(Rest,XsiGraph,[Def#temp.key|Defs],Xsis,Maps,Uses); + _ -> + get_info_in_merge_block(Rest,XsiGraph,Defs,Xsis,Maps,Uses) + end; + #xsi_link{} -> + Key = Inst#xsi_link.num, + {_Key,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + OpList = xsi_oplist(Xsi), + {NewMaps,NewUses} = add_map_and_uses(OpList,Key,Maps,Uses), + get_info_in_merge_block(Rest,XsiGraph,Defs,[Key|Xsis],NewMaps,NewUses); + _ -> + get_info_in_merge_block(Rest,XsiGraph,Defs,Xsis,Maps,Uses) + end. + +add_edges_for_mp([], _Label, LateEdges) -> + LateEdges; +add_edges_for_mp([P|Ps], Label, LateEdges) -> + add_edges_for_mp(Ps,Label,[{P,Label}|LateEdges]). + +%% Doesn't do anything so far +add_map_and_uses([], _Key, Maps, Uses) -> + {Maps,Uses}; +add_map_and_uses([XsiOp|Ops], Key, Maps, Uses) -> + case XsiOp#xsi_op.op of + #bottom{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + NewUses = Uses; + #temp{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + Pred = XsiOp#xsi_op.pred, + OOP = XsiOp#xsi_op.op, + SSet = case gb_trees:lookup(Pred,Uses) of + {value, VV} -> + ?SETS:add_element(OOP#temp.key,VV); + none -> + ?SETS:from_list([OOP#temp.key]) + end, + NewUses = gb_trees:enter(Pred,SSet,Uses); + #eop{} -> + Set = case gb_trees:lookup(XsiOp,Maps) of + {value, V} -> + ?SETS:add_element(Key,V); + none -> + ?SETS:from_list([Key]) + end, + NewMaps = gb_trees:enter(XsiOp,Set,Maps), + Pred = XsiOp#xsi_op.pred, + Op = XsiOp#xsi_op.op, + SSet = case gb_trees:lookup(Pred,Uses) of + {value, VV} -> + ?SETS:add_element(Op#eop.stopped_by,VV); + none -> + ?SETS:from_list([Op#eop.stopped_by]) + end, + NewUses = gb_trees:enter(Pred,SSet,Uses); + _-> + NewMaps = Maps, + NewUses = Uses + end, + add_map_and_uses(Ops, Key, NewMaps, NewUses). + +post_process([], _CFGGraph) -> ok; +post_process([E|Es], CFGGraph) -> + {Pred,Label} = E, + {_PP,Block} = ?GRAPH:vertex(CFGGraph,Label), + Att = Block#block.attributes, + Uses = Att#mp.uses, + SetToAdd = case gb_trees:lookup(Pred,Uses) of + {value, Set} -> + Set; + none -> + ?SETS:new() + end, + %% ?pp_debug("~nAdding an edge ~w -> ~w (~w)",[Pred,Label,SetToAdd]), + ?GRAPH:add_edge(CFGGraph, Pred, Label, SetToAdd), + post_process(Es, CFGGraph). + +factorize([], _CFGGraph) -> ok; +factorize([P|Ps], CFGGraph) -> + [OE|OEs] = ?GRAPH:out_edges(CFGGraph,P), + %% ?pp_debug("~nIn_degrees ~w : ~w",[P,?GRAPH:in_degree(CFGGraph,P)]), + [InEdge] = ?GRAPH:in_edges(CFGGraph,P), + {E,V1,V2,Label} = ?GRAPH:edge(CFGGraph,InEdge), + {_OEE,_OEV1,_OEV2,LOE} = ?GRAPH:edge(CFGGraph,OE), + List = shoot_info_upwards(OEs,LOE,CFGGraph), + NewLabel = ?SETS:union(Label,List), + ?GRAPH:add_edge(CFGGraph,E,V1,V2,NewLabel), + factorize(Ps, CFGGraph). + +shoot_info_upwards([], Acc, _CFGGraph) -> Acc; +shoot_info_upwards([E|Es], Acc, CFGGraph) -> + {_E,_V1,_V2,Set} = ?GRAPH:edge(CFGGraph,E), + NewAcc = ?SETS:intersection(Acc, Set), + case ?SETS:size(NewAcc) of + 0 -> NewAcc; + _ -> shoot_info_upwards(Es,NewAcc,CFGGraph) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% DOWNSAFETY %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_downsafety([], _G, _XsiG) -> + ok; +perform_downsafety([MP|MPs], G, XG) -> + {V,Block} = ?GRAPH:vertex(G, MP), + NDS = ?SETS:new(), + Att = Block#block.attributes, + Maps = Att#mp.maps, + Defs = Att#mp.defs, + OutEdges = ?GRAPH:out_edges(G, MP), + %% ?pp_debug("~n Inspection Maps : ~w",[Maps]), + NewNDS = parse_keys(gb_trees:keys(Maps),Maps,OutEdges,G,Defs,NDS,XG), + NewAtt = Att#mp{ndsSet = NewNDS}, + ?GRAPH:add_vertex(G, V, Block#block{attributes = NewAtt}), + ?pp_debug("~n Not Downsafe at L~w: ~w", [V, NewNDS]), + %%io:format(standard_io,"~n Not Downsafe at L~w: ~w",[V,NewNDS]), + perform_downsafety(MPs, G, XG). + +parse_keys([], _Maps, _OutEdges, _G, _Defs, NDS, _XsiG) -> + NDS; +parse_keys([M|Ms], Maps, OutEdges, G, Defs, NDS, XsiG) -> + KillerSet = gb_trees:get(M,Maps), + %% ?pp_debug("~n Inspection ~w -> ~w",[M,KillerSet]), + TempSet = ?SETS:intersection(KillerSet,Defs), + NewNDS = case ?SETS:size(TempSet) of + 0 -> getNDS(M,KillerSet,NDS,OutEdges,G,XsiG); + _ -> + %% One Xsi which has M as operand has killed it + %% M is then Downsafe + %% and is not added to the NotDownsafeSet (NDS) + NDS + end, + parse_keys(Ms, Maps, OutEdges, G, Defs, NewNDS, XsiG). + +getNDS(_M, _KillerSet, NDS, [], _G, _XsiG) -> + NDS; +getNDS(M, KillerSet, NDS, [E|Es], G, XsiG) -> + {_EE,_V1,_V2,Label} = ?GRAPH:edge(G, E), + Set = ?SETS:intersection(KillerSet, Label), + %% ?pp_debug("~n ######## Intersection between KillerSet: ~w and Label: ~w",[KillerSet,Label]), + %% ?pp_debug("~n ######## ~w",[Set]), + case ?SETS:size(Set) of + 0 -> + %% M is not downsafe + ?SETS:add_element(M, NDS); + _ -> + %% Try the other edges + getNDS(M, KillerSet, NDS, Es, G, XsiG) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% WILL BE AVAILABLE %%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_will_be_available(XsiGraph,CFGGraph,Options) -> + Keys = ?GRAPH:vertices(XsiGraph), + ?pp_debug("~n############ Can Be Available ##########~n",[]), + ?option_time(perform_can_be_available(Keys,XsiGraph,CFGGraph),"RTL A-SSAPRE WillBeAvailable - Compute CanBeAvailable",Options), + ?pp_debug("~n############ Later ##########~n",[]), + ?option_time(perform_later(Keys,XsiGraph),"RTL A-SSAPRE WillBeAvailable - Compute Later",Options). + +perform_can_be_available([],_XsiGraph,_CFGGraph) -> ok; +perform_can_be_available([Key|Keys],XsiGraph,CFGGraph) -> + {V,Xsi} = ?GRAPH:vertex(XsiGraph,Key), + case Xsi#xsi.cba of + undefined -> + {_VV,Block} = ?GRAPH:vertex(CFGGraph,Xsi#xsi.label), + Att = Block#block.attributes, + NDS = Att#mp.ndsSet, + OpList = ?SETS:from_list(xsi_oplist(Xsi)), + Set = ?SETS:intersection(NDS,OpList), + case ?SETS:size(Set) of + 0 -> + ?GRAPH:add_vertex(XsiGraph, V, Xsi#xsi{cba = true}), + perform_can_be_available(Keys, XsiGraph, CFGGraph); + _ -> + LIST = [X || #temp{key=X} <- ?SETS:to_list(Set)], + case LIST of + [] -> + ?GRAPH:add_vertex(XsiGraph, V, Xsi#xsi{cba = false}), + ImmediateParents = ?GRAPH:in_neighbours(XsiGraph, Key), + propagate_cba(ImmediateParents,XsiGraph,Xsi#xsi.def,CFGGraph); + _ -> + ok + end, + perform_can_be_available(Keys, XsiGraph, CFGGraph) + end; + _ -> %% True or False => recurse + perform_can_be_available(Keys, XsiGraph, CFGGraph) + end. + +propagate_cba([],_XG,_Def,_CFGG) -> ok; +propagate_cba([IPX|IPXs],XsiGraph,XsiDef,CFGGraph) -> + {V,IPXsi} = ?GRAPH:vertex(XsiGraph,IPX), + {_VV,Block} = ?GRAPH:vertex(CFGGraph,IPXsi#xsi.label), + Att = Block#block.attributes, + NDS = Att#mp.ndsSet, + List = ?SETS:to_list(?SETS:intersection(NDS,?SETS:from_list(xsi_oplist(IPXsi)))), + case IPXsi#xsi.cba of + false -> ok; + _ -> + case lists:keymember(XsiDef, #xsi_op.op, List) of + true -> + ?GRAPH:add_vertex(XsiGraph, V, IPXsi#xsi{cba = false}), + ImmediateParents = ?GRAPH:in_neighbours(XsiGraph, IPX), + propagate_cba(ImmediateParents,XsiGraph,IPXsi#xsi.def,CFGGraph); + _ -> + ok + end + end, + propagate_cba(IPXs,XsiGraph,XsiDef,CFGGraph). + +perform_later([], _XsiGraph) -> ok; +perform_later([Key|Keys], XsiGraph) -> + {V, Xsi} = ?GRAPH:vertex(XsiGraph, Key), + %% ?pp_debug("~n DEBUG : inspecting later of ~w (~w)~n",[Key,Xsi#xsi.later]), + case Xsi#xsi.later of + undefined -> + OpList = xsi_oplist(Xsi), + case parse_ops(OpList,fangpi) of %% It means "fart" in chinese :D + has_temp -> + perform_later(Keys,XsiGraph); + has_real -> + case Xsi#xsi.cba of + true -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=true}); + undefined -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=true}); + _ -> + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=false,wba=false}) + end, + AllParents = digraph_utils:reaching([Key], XsiGraph), + ?pp_debug("~nPropagating to all parents of t~w: ~w",[Key,AllParents]), + propagate_later(AllParents,XsiGraph), + perform_later(Keys,XsiGraph); + _ -> %% Just contains bottoms and/or expressions + ?GRAPH:add_vertex(XsiGraph,V,Xsi#xsi{later=true}), + perform_later(Keys,XsiGraph) + end; + _ -> %% True or False => recurse + perform_later(Keys,XsiGraph) + end. + +propagate_later([], _XG) -> ok; +propagate_later([IPX|IPXs], XsiGraph) -> + {V,IPXsi} = ?GRAPH:vertex(XsiGraph,IPX), + case IPXsi#xsi.later of + false -> + ?pp_debug("~nThrough propagation, later of t~w is already reset",[IPX]), + propagate_later(IPXs,XsiGraph); + _ -> + ?pp_debug("~nThrough propagation, resetting later of t~w",[IPX]), + case IPXsi#xsi.cba of + true -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=true}); + undefined -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=true}); + _ -> + ?GRAPH:add_vertex(XsiGraph,V,IPXsi#xsi{later=false,wba=false}) + end, + propagate_later(IPXs,XsiGraph) + end. + +parse_ops([], Res) -> + Res; +parse_ops([Op|Ops], Res) -> + case Op#xsi_op.op of + #temp{} -> + NewRes = has_temp, + parse_ops(Ops,NewRes); + #bottom{} -> + parse_ops(Ops,Res); + #eop{} -> + parse_ops(Ops,Res); + _ -> + has_real + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% CODE MOTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +perform_code_motion([], Cfg, _XsiG) -> + Cfg; +perform_code_motion([L|Labels], Cfg, XsiG) -> + Code=?BB:code(?CFG:bb(Cfg,L)), + ?pp_debug("~n################ Code Motion in L~w~n",[L]), + ?pp_debug("~nCode to move ~n",[]), + pp_instrs(Code,XsiG), + NewCfg = code_motion_in_block(L,Code,Cfg,XsiG,[],gb_trees:empty()), + ?pp_debug("~n################ Code Motion successful in L~w~n",[L]), + perform_code_motion(Labels,NewCfg,XsiG). + +code_motion_in_block(Label,[],Cfg,_XsiG,Visited,InsertionsAcc) -> + InsertionsAlong = gb_trees:keys(InsertionsAcc), + Code = lists:reverse(Visited), + NewBB = ?BB:mk_bb(Code), + Cfg2 = ?CFG:bb_add(Cfg,Label,NewBB), + %% Must come after the bb_add, since redirect will update the Phis too... + Cfg3 = make_insertions(Label,InsertionsAlong,InsertionsAcc,Cfg2), + %% ?pp_debug("~nChecking the Code at L~w:~n~p",[Label,?BB:code(?CFG:bb(Cfg3,Label))]), + Cfg3; +code_motion_in_block(L,[Inst|Insts],Cfg,XsiG,Visited,InsertionsAcc) -> + ?pp_debug("~nInspecting Inst : ~n",[]),pp_instr(Inst,XsiG), + case Inst of + #pre_candidate{} -> + Def = Inst#pre_candidate.def, + Alu = Inst#pre_candidate.alu, + case Def of + bottom -> + InstToAdd = Alu; + #temp{} -> + Key = Def#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + %% Turn into a move + Dst = ?RTL:alu_dst(Alu), + Move = ?RTL:mk_move(Dst,Def#temp.var), + pp_instr(Inst#pre_candidate.alu,nil), ?pp_debug(" ==> ",[]), pp_instr(Move,nil), + %% Counting redundancies + redundancy_add(), + InstToAdd = Move; + _ -> + InstToAdd = Alu + end; + _ -> %% Def is a real variable + %% Turn into a move + Dst = ?RTL:alu_dst(Alu), + Move = ?RTL:mk_move(Dst,Def), + pp_instr(Alu,nil), ?pp_debug(" ==> ",[]), pp_instr(Move,nil), + %% Counting redundancies + redundancy_add(), + InstToAdd = Move + end, + code_motion_in_block(L,Insts,Cfg,XsiG,[InstToAdd|Visited],InsertionsAcc); + #xsi_link{} -> + Key = Inst#xsi_link.num, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + %% Xsi is a WBA, it might trigger insertions + OpList = xsi_oplist(Xsi), + ?pp_debug(" This Xsi is a 'Will be available'",[]), + %% Cleaning the instruction + Expr = prepare_inst(Xsi#xsi.inst), + {NewOpList,NewInsertionsAcc} = get_insertions(OpList,[],InsertionsAcc,Visited,Expr,XsiG), + %% Making Xsi a Phi with Oplist + PhiOpList = [{Pred,Var} || #xsi_op{pred=Pred,op=Var} <- NewOpList], + Def = Xsi#xsi.def, + Phi = ?RTL:phi_arglist_update(?RTL:mk_phi(Def#temp.var),PhiOpList), + ?pp_debug("~n Xsi is turned into Phi : ~w",[Phi]), + code_motion_in_block(L,Insts,Cfg,XsiG,[Phi|Visited],NewInsertionsAcc); + _ -> + ?pp_debug(" This Xsi is not a 'Will be available'",[]), + code_motion_in_block(L,Insts,Cfg,XsiG,Visited,InsertionsAcc) + end; +%% phi -> +%% code_motion_in_block(L,Insts,Cfg,XsiG,[Inst|Visited],InsertionsAcc); + _ -> + %% Other instructions.... Phis too + code_motion_in_block(L,Insts,Cfg,XsiG,[Inst|Visited],InsertionsAcc) + end. + +prepare_inst(Expr) -> + S1 = ?RTL:alu_src1(Expr), + S2 = ?RTL:alu_src2(Expr), + NewInst = case S1 of + #temp{} -> ?RTL:alu_src1_update(Expr,S1#temp.var); + _ -> Expr + end, + case S2 of + #temp{} -> ?RTL:alu_src2_update(NewInst,S2#temp.var); + _ -> NewInst + end. + +get_insertions([],OpAcc,InsertionsAcc,_Visited,_Expr,_XsiG) -> + {OpAcc,InsertionsAcc}; +get_insertions([XsiOp|Ops],OpAcc,InsertionsAcc,Visited,Expr,XsiG) -> + Pred = XsiOp#xsi_op.pred, + Op = XsiOp#xsi_op.op, + case Op of + #bottom{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#bottom.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#bottom.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #const_expr{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere have been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#const_expr.var, + Val = Op#const_expr.value, + Inst = ?RTL:mk_move(Dst,Val), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere have been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#const_expr.var, + Val = Op#const_expr.value, + Inst = ?RTL:mk_move(Dst,Val), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #eop{} -> + %% We treat expressions like bottoms + %% The value must be recomputed, and therefore not available... + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Dst = Op#eop.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc); + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too | Op=",[Pred]),pp_arg(Op), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course)| Op=",[Pred]),pp_arg(Op), + Dst = Op#eop.var, + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end; + #temp{} -> + case gb_trees:lookup(Pred,InsertionsAcc) of + {value,Insertion} -> + From = Insertion#insertion.from, + case lists:keyfind(Op, 1, From) of + false -> + ?pp_debug("~nThere has been insertions along the edge L~w already, but not for that operand | Op=",[Pred]),pp_arg(Op), + Key = Op#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + ?pp_debug("~nBut the operand is a WBA Xsi: no need for insertion",[]), + Dst = Op#temp.var, + NewInsertionsAcc = InsertionsAcc; + _ -> + ?pp_debug("~nBut the operand is a NOT WBA Xsi: we must make an insertion",[]), + Dst = ?RTL:mk_new_var(), + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + Code = Insertion#insertion.code, + NewInsertion = Insertion#insertion{from=[{Op,Dst}|From],code=[Inst|Code]}, + NewInsertionsAcc = gb_trees:update(Pred,NewInsertion,InsertionsAcc) + end; + {_, Val} -> + ?pp_debug("~nThere has been insertions along the edge L~w already, and for that operand too (Op=~w)",[Pred,Op]), + ?pp_debug("~nThis means, this temp is a WBA Xsi's definition",[]), + Dst = Val, + NewInsertionsAcc = InsertionsAcc + end; + none -> + ?pp_debug("~nThere has been no insertion along the edge L~w, (and not for that operand, of course | Op=",[Pred]),pp_arg(Op), + Key = Op#temp.key, + {_V,Xsi} = ?GRAPH:vertex(XsiG,Key), + case Xsi#xsi.wba of + true -> + ?pp_debug("~nBut the operand is a WBA Xsi: no need for insertion",[]), + Dst = Op#temp.var, + NewInsertionsAcc = InsertionsAcc; + _ -> + ?pp_debug("~nBut the operand is a NOT WBA Xsi: we must make an insertion",[]), + Dst = ?RTL:mk_new_var(), + Expr2 = ?RTL:alu_dst_update(Expr,Dst), + Inst = manufacture_computation(Pred,Expr2,Visited), + NewInsertion = #insertion{from=[{Op,Dst}],code=[Inst]}, + NewInsertionsAcc = gb_trees:insert(Pred,NewInsertion,InsertionsAcc) + end + end; + _ -> + ?pp_debug("~nThe operand (Op=",[]),pp_arg(Op),?pp_debug(") is a real variable, no need for insertion along L~w",[Pred]), + Dst = Op, + NewInsertionsAcc = InsertionsAcc + end, + NewXsiOp = XsiOp#xsi_op{op=Dst}, + get_insertions(Ops, [NewXsiOp|OpAcc], NewInsertionsAcc, Visited, Expr, XsiG). + +manufacture_computation(_Pred, Expr, []) -> + ?pp_debug("~n Manufactured computation : ~w", [Expr]), + Expr; +manufacture_computation(Pred, Expr, [I|Rest]) -> + %% ?pp_debug("~n Expr = ~w",[Expr]), + SRC1 = ?RTL:alu_src1(Expr), + SRC2 = ?RTL:alu_src2(Expr), + case I of + #xsi_link{} -> + exit({?MODULE,should_not_be_a_xsi_link,{"Why the hell do we still have a xsi link???",I}}); + #xsi{} -> + exit({?MODULE,should_not_be_a_xsi,{"Why the hell do we still have a xsi ???",I}}); + #phi{} -> + DST = ?RTL:phi_dst(I), + Arg = ?RTL:phi_arg(I,Pred), + NewInst = case DST =:= SRC1 of + true -> ?RTL:alu_src1_update(Expr,Arg); + false -> Expr + end, + NewExpr = case DST =:= SRC2 of + true -> ?RTL:alu_src2_update(NewInst,Arg); + false -> NewInst + end, + manufacture_computation(Pred,NewExpr,Rest) + end. + +make_insertions(_L, [], _ITree, Cfg) -> + Cfg; +make_insertions(L, [OldPred|Is], ITree, Cfg) -> + NewPred = ?RTL:label_name(?RTL:mk_new_label()), + I = gb_trees:get(OldPred, ITree), + CodeToInsert = lists:reverse([?RTL:mk_goto(L)|I#insertion.code]), + BBToInsert = ?BB:mk_bb(CodeToInsert), + NewCfg = ?CFG:bb_insert_between(Cfg, NewPred, BBToInsert, OldPred, L), + make_insertions(L, Is, ITree, NewCfg). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% XSI INTERFACE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +xsi_oplist(#xsi{opList=OpList}) -> + case OpList of undefined -> [] ; _ -> OpList end. +xsi_arg(Xsi, Pred) -> + case lists:keyfind(Pred, #xsi_op.pred, xsi_oplist(Xsi)) of + false -> + undetermined_operand; + R -> + R#xsi_op.op + end. +xsi_arg_update(Xsi, Pred, Op) -> + NewOpList = lists:keyreplace(Pred, #xsi_op.pred, xsi_oplist(Xsi), + #xsi_op{pred=Pred,op=Op}), + Xsi#xsi{opList=NewOpList}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PRETTY-PRINTING %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-ifndef(SSAPRE_DEBUG). + +%%pp_cfg(Cfg,_) -> ?CFG:pp(Cfg). +pp_cfg(_,_) -> ok. +pp_instr(_,_) -> ok. +pp_instrs(_,_) -> ok. +pp_expr(_) -> ok. +pp_xsi(_) -> ok. +pp_arg(_) -> ok. +pp_xsigraph(_) -> ok. +pp_cfggraph(_) -> ok. +%% pp_xsigraph(G) -> +%% Vertices = lists:sort(?GRAPH:vertices(G)), +%% io:format(standard_io, "Size of the Xsi Graph: ~w", [length(Vertices)]). +%% pp_cfggraph(G) -> +%% Vertices = lists:sort(?GRAPH:vertices(G)), +%% io:format(standard_io, "Size of the CFG Graph: ~w", [length(Vertices)]). + +-else. + +pp_cfg(Cfg, Graph) -> + Labels = ?CFG:preorder(Cfg), + pp_blocks(Labels, Cfg, Graph). + +pp_blocks([], _, _) -> + ok; +pp_blocks([L|Ls], Cfg, Graph) -> + Code = hipe_bb:code(?CFG:bb(Cfg,L)), + io:format(standard_io,"~n########## Label L~w~n", [L]), + pp_instrs(Code, Graph), + pp_blocks(Ls, Cfg, Graph). + +pp_instrs([], _) -> + ok; +pp_instrs([I|Is], Graph) -> + pp_instr(I, Graph), + pp_instrs(Is, Graph). + +pp_xsi_link(Key, Graph) -> + {_Key,Xsi} = ?GRAPH:vertex(Graph, Key), + pp_xsi(Xsi). + +pp_xsi(Xsi) -> + io:format(standard_io, " [L~w] ", [Xsi#xsi.label]), + io:format(standard_io, "[", []), pp_expr(Xsi#xsi.inst), + io:format(standard_io, "] Xsi(", []), pp_xsi_args(xsi_oplist(Xsi)), + io:format(standard_io, ") (", []), pp_xsi_def(Xsi#xsi.def), + io:format(standard_io, ") cba=~w, later=~w | wba=~w~n", [Xsi#xsi.cba,Xsi#xsi.later,Xsi#xsi.wba]). + +pp_instr(I, Graph) -> + case I of + #alu{} -> + io:format(standard_io, " ", []), + pp_arg(?RTL:alu_dst(I)), + io:format(standard_io, " <- ", []), + pp_expr(I), + io:format(standard_io, "~n", []); + _ -> + try ?RTL:pp_instr(standard_io, I) + catch _:_ -> + case I of + #pre_candidate{} -> + pp_pre(I); + #xsi{} -> + pp_xsi(I); + #xsi_link{} -> + pp_xsi_link(I#xsi_link.num, Graph); + _-> + io:format(standard_io,"*** ~w ***~n", [I]) + end + end + end. + +pp_pre(I) -> + A = I#pre_candidate.alu, + io:format(standard_io, " ", []), + pp_arg(?RTL:alu_dst(A)), + io:format(standard_io, " <- ", []),pp_expr(A), + io:format(standard_io, " [ ", []),pp_arg(I#pre_candidate.def), + %%io:format(standard_io, "~w", [I#pre_candidate.def]), + io:format(standard_io, " ]~n",[]). + +pp_expr(I) -> + pp_arg(?RTL:alu_dst(I)), + io:format(standard_io, " <- ", []), + pp_arg(?RTL:alu_src1(I)), + io:format(standard_io, " ~w ", [?RTL:alu_op(I)]), + pp_arg(?RTL:alu_src2(I)). + +pp_arg(Arg) -> + case Arg of + bottom -> + io:format(standard_io, "_|_", []); + #bottom{} -> + io:format(standard_io, "_|_:~w (", [Arg#bottom.key]),pp_arg(Arg#bottom.var),io:format(standard_io,")",[]); + #temp{} -> + pp_xsi_def(Arg); + #eop{} -> + io:format(standard_io,"#",[]),pp_expr(Arg#eop.expr),io:format(standard_io,"(",[]),pp_arg(Arg#eop.var),io:format(standard_io,")#",[]); + #const_expr{} -> + io:format(standard_io,"*",[]),pp_arg(Arg#const_expr.var),io:format(standard_io," -> ",[]),pp_arg(Arg#const_expr.value),io:format(standard_io,"*",[]); + undefined -> + io:format(standard_io, "...", []); %%"undefined", []); + _-> + case Arg of + #alu{} -> + pp_expr(Arg); + _-> + ?RTL:pp_arg(standard_io, Arg) + end + end. + +pp_args([]) -> + ok; +pp_args(undefined) -> + io:format(standard_io, "...,...,...", []); +pp_args([A]) -> + pp_arg(A); +pp_args([A|As]) -> + pp_arg(A), + io:format(standard_io, ", ", []), + pp_args(As). + +pp_xsi_args([]) -> ok; +pp_xsi_args([XsiOp]) -> + io:format(standard_io, "{~w| ", [XsiOp#xsi_op.pred]), + pp_arg(XsiOp#xsi_op.op), + io:format(standard_io, "}", []); +pp_xsi_args([XsiOp|Args]) -> + io:format(standard_io, "{~w| ", [XsiOp#xsi_op.pred]), + pp_arg(XsiOp#xsi_op.op), + io:format(standard_io, "}, ", []), + pp_xsi_args(Args); +pp_xsi_args(Args) -> + pp_args(Args). + +pp_xsi_def(Arg) -> + D = Arg#temp.key, + V = Arg#temp.var, + io:format(standard_io, "t~w (", [D]),pp_arg(V),io:format(standard_io,")",[]). + +pp_cfggraph(G) -> + Vertices = lists:sort(?GRAPH:vertices(G)), + io:format(standard_io, "Size of the CFG Graph: ~w ~n", [length(Vertices)]), + pp_cfgvertex(Vertices, G). + +pp_xsigraph(G) -> + Vertices = lists:sort(?GRAPH:vertices(G)), + io:format(standard_io, "Size of the Xsi Graph: ~w ~n", [length(Vertices)]), + pp_xsivertex(Vertices,G). + +pp_xsivertex([], _G) -> + ok; +pp_xsivertex([Key|Keys], G) -> + {V,Xsi} = ?GRAPH:vertex(G, Key), + OutNeighbours = ?GRAPH:out_neighbours(G, V), + ?pp_debug(" ~w -> ~w", [V,OutNeighbours]), pp_xsi(Xsi), + pp_xsivertex(Keys, G). + +pp_cfgvertex([], _G) -> + ok; +pp_cfgvertex([Key|Keys], G) -> + {V,Block} = ?GRAPH:vertex(G,Key), + case Block#block.type of + mp -> + ?pp_debug("~n Block ~w's attributes: ~n", [V]), + pp_attributes(Block), + ?pp_debug("~n Block ~w's edges: ~n", [V]), + pp_edges(G, ?GRAPH:in_edges(G,Key), ?GRAPH:out_edges(G,Key)); + _-> + ok + end, + pp_cfgvertex(Keys, G). + +pp_attributes(Block) -> + Att = Block#block.attributes, + case Att of + undefined -> + ok; + _ -> + ?pp_debug(" Maps: ~n",[]),pp_maps(gb_trees:keys(Att#mp.maps),Att#mp.maps), + ?pp_debug(" Uses: ~n",[]),pp_uses(gb_trees:keys(Att#mp.uses),Att#mp.uses), + ?pp_debug(" Defs: ~w~n",[Att#mp.defs]), + ?pp_debug(" Xsis: ~w~n",[Att#mp.xsis]), + ?pp_debug(" NDS : ",[]),pp_nds(?SETS:to_list(Att#mp.ndsSet)) + end. + +pp_maps([], _Maps) -> ok; +pp_maps([K|Ks], Maps) -> + ?pp_debug(" ",[]),pp_arg(K#xsi_op.op),?pp_debug("-> ~w~n",[?SETS:to_list(gb_trees:get(K,Maps))]), + pp_maps(Ks, Maps). + +pp_uses([], _Maps) -> ok; +pp_uses([K|Ks], Maps) -> + ?pp_debug(" ~w -> ~w~n",[K,?SETS:to_list(gb_trees:get(K,Maps))]), + pp_uses(Ks, Maps). + +pp_nds([]) -> ?pp_debug("~n",[]); +pp_nds(undefined) -> ?pp_debug("None",[]); +pp_nds([K]) -> + pp_arg(K#xsi_op.op), ?pp_debug("~n",[]); +pp_nds([K|Ks]) -> + pp_arg(K#xsi_op.op), ?pp_debug(", ",[]), + pp_nds(Ks). + +pp_edges(_G, [], []) -> ok; +pp_edges(G, [], [OUT|OUTs]) -> + {_E,V1,V2,Label} = ?GRAPH:edge(G,OUT), + ?pp_debug(" Out edge ~w -> ~w (~w)~n", [V1,V2,?SETS:to_list(Label)]), + pp_edges(G, [], OUTs); +pp_edges(G, [IN|INs], Outs) -> + {_E,V1,V2,Label} = ?GRAPH:edge(G,IN), + ?pp_debug(" In edge ~w -> ~w (~w)~n", [V1,V2,?SETS:to_list(Label)]), + pp_edges(G, INs, Outs). + +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% COUNTERS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init_counters() -> + put({ssapre_temp,temp_count}, 0), + put({ssapre_index,index_count}, 0). + +new_bottom() -> + IndxCountPair = {ssapre_index, index_count}, + V = get(IndxCountPair), + put(IndxCountPair, V+1), + #bottom{key = V, var = ?RTL:mk_new_var()}. + +new_temp() -> + TmpCountPair = {ssapre_temp, temp_count}, + V = get(TmpCountPair), + put(TmpCountPair, V+1), + #temp{key = V, var = ?RTL:mk_new_var()}. + +init_redundancy_count() -> + put({ssapre_redundancy,redundancy_count}, 0). + +redundancy_add() -> + RedCountPair = {ssapre_redundancy, redundancy_count}, + V = get(RedCountPair), + put(RedCountPair, V+1). + +-ifdef(SSAPRE_DEBUG). +get_redundancy_count() -> + get({ssapre_redundancy,redundancy_count}). +-endif. diff --git a/lib/hipe/rtl/hipe_rtl_symbolic.erl b/lib/hipe/rtl/hipe_rtl_symbolic.erl new file mode 100644 index 0000000000..bc8640dec9 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_symbolic.erl @@ -0,0 +1,99 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%------------------------------------------------------------------- +%% File : hipe_rtl_symbolic.erl +%% Author : Per Gustafsson <pergu@it.uu.se> +%% Description : Expansion of symbolic instructions. +%% +%% Created : 18 May 2004 by Per Gustafsson <pergu@it.uu.se> +%%------------------------------------------------------------------- + +-module(hipe_rtl_symbolic). + +-export([expand/1]). + +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). +-include("../icode/hipe_icode_primops.hrl"). + +expand(Cfg) -> + Linear = hipe_rtl_cfg:linearize(Cfg), + Code = hipe_rtl:rtl_code(Linear), + NonFlatCode = [expand_instr(Instr) || Instr <- Code], + NewCode = lists:flatten(NonFlatCode), + Linear1 = hipe_rtl:rtl_code_update(Linear, NewCode), + hipe_rtl_cfg:init(Linear1). + +expand_instr(Instr) -> + case Instr of + #fixnumop{} -> + expand_fixnumop(Instr); + #gctest{} -> + expand_gctest(Instr); + _ -> + Instr + end. + +expand_fixnumop(Instr) -> + case hipe_rtl:fixnumop_type(Instr) of + untag -> + Dst = hipe_rtl:fixnumop_dst(Instr), + Src = hipe_rtl:fixnumop_src(Instr), + hipe_tagscheme:realuntag_fixnum(Dst, Src); + tag -> + Dst = hipe_rtl:fixnumop_dst(Instr), + Src = hipe_rtl:fixnumop_src(Instr), + hipe_tagscheme:realtag_fixnum(Dst, Src) + end. + +expand_gctest(Instr) -> + HeapNeed = hipe_rtl:gctest_words(Instr), + {GetHPInsn, HP, _PutHPInsn} = hipe_rtl_arch:heap_pointer(), + {GetHLIMITInsn, H_LIMIT} = hipe_rtl_arch:heap_limit(), + ContLabel = hipe_rtl:mk_new_label(), + GCLabel = hipe_rtl:mk_new_label(), + ContLabelName = hipe_rtl:label_name(ContLabel), + GCLabelName = hipe_rtl:label_name(GCLabel), + Tmp = hipe_rtl:mk_new_reg(), % diff between two gc-unsafe pointers + StartCode = + [GetHPInsn, + GetHLIMITInsn, + hipe_rtl:mk_alu(Tmp, H_LIMIT, 'sub', HP)], + {SeparateCode, GCAmount, HPAmount} = + case hipe_rtl:is_reg(HeapNeed) of + true -> + GA = hipe_rtl:mk_new_reg_gcsafe(), + HA = hipe_rtl:mk_new_reg_gcsafe(), + {[hipe_rtl:mk_alu(HA, HeapNeed, sll, + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size()))| + hipe_tagscheme:realtag_fixnum(GA, HeapNeed)], GA, HA}; + false -> + WordsNeeded = hipe_rtl:imm_value(HeapNeed), + GA = hipe_rtl:mk_imm(hipe_tagscheme:mk_fixnum(WordsNeeded)), + HA = hipe_rtl:mk_imm(WordsNeeded*hipe_rtl_arch:word_size()), + {[], GA, HA} + end, + EndCode = + [hipe_rtl:mk_branch(Tmp, 'lt', HPAmount, GCLabelName, ContLabelName, 0.01), + GCLabel, + hipe_rtl:mk_call([], 'gc_1', [GCAmount], ContLabelName, [], not_remote), + ContLabel], + StartCode ++ SeparateCode ++ EndCode. + diff --git a/lib/hipe/rtl/hipe_rtl_varmap.erl b/lib/hipe/rtl/hipe_rtl_varmap.erl new file mode 100644 index 0000000000..9bd5e88611 --- /dev/null +++ b/lib/hipe/rtl/hipe_rtl_varmap.erl @@ -0,0 +1,161 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Copyright (c) 2001 by Erik Johansson. All Rights Reserved +%% Time-stamp: <2008-04-20 14:55:35 richard> +%% ==================================================================== +%% Module : hipe_rtl_varmap +%% Purpose : +%% Notes : +%% History : * 2001-04-10 Erik Johansson (happi@it.uu.se): Created. +%% ==================================================================== +%% Exports : +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-module(hipe_rtl_varmap). + +-export([init/1, + ivs2rvs/2, + icode_var2rtl_var/2, + icode_label2rtl_label/2]). + +%------------------------------------------------------------------------- + +-include("../main/hipe.hrl"). +-include("../icode/hipe_icode.hrl"). + +%------------------------------------------------------------------------- + +%% @spec init(IcodeRecord::#icode{}) -> {Args, VarMap} +%% +%% @doc Initializes gensym for RTL. + +-spec init(#icode{}) -> {[_], _}. % XXX: fix me please + +init(IcodeRecord) -> + hipe_gensym:init(rtl), + hipe_gensym:set_var(rtl, hipe_rtl_arch:first_virtual_reg()), + hipe_gensym:set_label(rtl, 0), + VarMap = new_var_map(), + {_Args, _VarMap1} = ivs2rvs(hipe_icode:icode_params(IcodeRecord), VarMap). + + +%%------------------------------------------------------------------------ +%% +%% Mapping of labels and variables from Icode to RTL. +%% +%%------------------------------------------------------------------------ + + +%% @spec icode_label2rtl_label(Icode_Label::term(), LabelMap::term()) -> +%% {RTL_Label, NewLabelMap} +%% +%% @doc Converts an Icode label to an RTL label. + +icode_label2rtl_label(LabelName, Map) -> + case lookup(LabelName, Map) of + {value, NewLabel} -> + {NewLabel, Map}; + none -> + NewLabel = hipe_rtl:mk_new_label(), + {NewLabel, insert(LabelName, NewLabel, Map)} + end. + + +%% @spec ivs2rvs(Icode_Vars::[term()], VarMap::term()) -> {[RTL_Var],NewVarMap} +%% +%% @doc Converts a list of Icode variables to a list of RTL variables. + +ivs2rvs([], VarMap) -> + {[], VarMap}; +ivs2rvs([V|Vs], VarMap) -> + {NewV, VarMap0} = icode_var2rtl_var(V, VarMap), + {NewVs, VarMap1} = ivs2rvs(Vs, VarMap0), + {[NewV|NewVs], VarMap1}. + + +%% @spec icode_var2rtl_var(Icode_Var::term(), VarMap::term()) -> +%% {RTL_Var, NewVarMap} +%% +%% @doc Converts an Icode variable to an RTL variable. + +icode_var2rtl_var(Var, Map) -> + Value = lookup(Var, Map), + case Value of + none -> + case type_of_var(Var) of + fvar -> + NewVar = hipe_rtl:mk_new_fpreg(), + {NewVar, insert(Var, NewVar, Map)}; + var -> + NewVar = hipe_rtl:mk_new_var(), + {NewVar, insert(Var, NewVar, Map)}; + {reg, IsGcSafe} -> + NewVar = + case IsGcSafe of + %% true -> hipe_rtl:mk_new_reg_gcsafe(); + false -> hipe_rtl:mk_new_reg() + end, + {NewVar, insert(Var, NewVar, Map)} + end; + {value, NewVar} -> + {NewVar, Map} + end. + +%% +%% Simple type test +%% + +type_of_var(X) -> + case hipe_icode:is_fvar(X) of + true -> + fvar; + false -> + case hipe_icode:is_var(X) of + true -> + var; + false -> + case hipe_icode:is_reg(X) of + true -> + {reg, hipe_icode:reg_is_gcsafe(X)}; + false -> + %% Sanity check + case hipe_icode:is_const(X) of + true -> const; + false -> + exit({"Unknown Icode variable", X}) + end + end + end + end. + +%% +%% Helping utilities +%% + +new_var_map() -> + gb_trees:empty(). + +lookup(V, Map) -> + gb_trees:lookup(V, Map). + +insert(Key, Val, Map) -> + gb_trees:insert(Key, Val, Map). diff --git a/lib/hipe/rtl/hipe_tagscheme.erl b/lib/hipe/rtl/hipe_tagscheme.erl new file mode 100644 index 0000000000..dc44b803a1 --- /dev/null +++ b/lib/hipe/rtl/hipe_tagscheme.erl @@ -0,0 +1,1209 @@ +%% -*- erlang-indent-level: 2 -*- +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%%======================================================================== +%% +%% Filename : hipe_tagscheme.erl +%% Note : This is specific to Erlang 5.* (i.e. R9 to R13). +%% +%% Modifications: +%% 020904: Happi - added support for external pids and ports. +%% +%%======================================================================== +%% $Id$ +%%======================================================================== + +-module(hipe_tagscheme). + +-export([mk_nil/0, mk_fixnum/1, mk_arityval/1, mk_non_value/0]). +-export([is_fixnum/1]). +-export([tag_tuple/2, tag_cons/2]). +-export([test_is_boxed/4, get_header/2]). +-export([test_nil/4, test_cons/4, test_flonum/4, test_fixnum/4, + test_tuple/4, test_atom/4, test_bignum/4, test_pos_bignum/4, + test_any_pid/4, test_any_port/4, + test_ref/4, test_fun/4, test_fun2/5, test_matchstate/4, + test_binary/4, test_bitstr/4, test_list/4, + test_integer/4, test_number/4, test_constant/4, test_tuple_N/5]). +-export([realtag_fixnum/2, tag_fixnum/2, realuntag_fixnum/2, untag_fixnum/2]). +-export([test_two_fixnums/3, test_fixnums/4, unsafe_fixnum_add/3, + unsafe_fixnum_sub/3, + fixnum_gt/5, fixnum_lt/5, fixnum_ge/5, fixnum_le/5, fixnum_val/1, + fixnum_mul/4, + fixnum_addsub/5, fixnum_andorxor/4, fixnum_not/2, + fixnum_bsr/3, fixnum_bsl/3]). +-export([unsafe_car/2, unsafe_cdr/2, + unsafe_constant_element/3, unsafe_update_element/3, element/6]). +-export([unsafe_closure_element/3]). +-export([mk_fun_header/0, tag_fun/2]). +-export([unsafe_untag_float/2, unsafe_tag_float/2]). +-export([mk_sub_binary/6,mk_sub_binary/7]). +-export([unsafe_mk_big/3, unsafe_load_float/3]). +-export([bignum_sizeneed/1,bignum_sizeneed_code/2, get_one_word_pos_bignum/3]). +-export([test_subbinary/3, test_heap_binary/3]). +-export([create_heap_binary/3, create_refc_binary/3, create_refc_binary/4]). +-export([create_matchstate/6, convert_matchstate/1, compare_matchstate/4]). +-export([get_field_from_term/3, get_field_from_pointer/3, + set_field_from_term/3, set_field_from_pointer/3, + extract_matchbuffer/2, extract_binary_bytes/2]). + +-include("hipe_rtl.hrl"). +-include("hipe_literals.hrl"). + +-ifdef(EFT_NATIVE_ADDRESS). +-export([if_fun_get_arity_and_address/5]). +-endif. + +-undef(TAG_PRIMARY_BOXED). +-undef(TAG_IMMED2_MASK). +-undef(TAG_IMMED2_CATCH). +-undef(TAG_IMMED2_SIZE). + +%%------------------------------------------------------------------------ + +-define(TAG_PRIMARY_SIZE, 2). +-define(TAG_PRIMARY_MASK, 16#3). +-define(TAG_PRIMARY_HEADER, 16#0). +-define(TAG_PRIMARY_LIST, 16#1). +-define(TAG_PRIMARY_BOXED, 16#2). +-define(TAG_PRIMARY_IMMED1, 16#3). + +-define(TAG_IMMED1_SIZE, 4). +-define(TAG_IMMED1_MASK, 16#F). +-define(TAG_IMMED1_PID, ((16#0 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_PORT, ((16#1 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_IMMED2,((16#2 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). +-define(TAG_IMMED1_SMALL, ((16#3 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_IMMED1)). + +-define(TAG_IMMED2_SIZE, 6). +-define(TAG_IMMED2_MASK, 16#3F). +-define(TAG_IMMED2_ATOM, ((16#0 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). +-define(TAG_IMMED2_CATCH, ((16#1 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). +-define(TAG_IMMED2_NIL, ((16#3 bsl ?TAG_IMMED1_SIZE) bor ?TAG_IMMED1_IMMED2)). + +-define(TAG_HEADER_ARITYVAL,((16#0 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_BIN_MATCHSTATE, ((16#1 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_POS_BIG, ((16#2 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_NEG_BIG, ((16#3 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(BIG_SIGN_BIT, (16#1 bsl ?TAG_PRIMARY_SIZE)). +-define(TAG_HEADER_REF, ((16#4 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_FUN, ((16#5 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_FLOAT, ((16#6 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXPORT, ((16#7 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(BINARY_XXX_MASK, (16#3 bsl ?TAG_PRIMARY_SIZE)). +-define(TAG_HEADER_REFC_BIN,((16#8 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_HEAP_BIN,((16#9 bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_SUB_BIN, ((16#A bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_PID, ((16#C bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_PORT,((16#D bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). +-define(TAG_HEADER_EXTERNAL_REF, ((16#E bsl ?TAG_PRIMARY_SIZE) bor ?TAG_PRIMARY_HEADER)). + +-define(TAG_HEADER_MASK, 16#3F). +-define(HEADER_ARITY_OFFS, 6). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +mk_header(SZ,TAG) -> (SZ bsl ?HEADER_ARITY_OFFS) + TAG. +mk_arityval(SZ) -> mk_header(SZ, ?TAG_HEADER_ARITYVAL). + +size_from_header(Sz, Header) -> + [hipe_rtl:mk_alu(Sz, Header, 'srl', hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))]. + +mk_var_header(Header, Size, Tag) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Size, sll, hipe_rtl:mk_imm(?HEADER_ARITY_OFFS)), + hipe_rtl:mk_alu(Header, Tmp, 'add', hipe_rtl:mk_imm(Tag))]. + +mk_fixnum(X) -> (X bsl ?TAG_IMMED1_SIZE) + ?TAG_IMMED1_SMALL. + +-define(NIL, ((-1 bsl ?TAG_IMMED2_SIZE) bor ?TAG_IMMED2_NIL)). +mk_nil() -> ?NIL. +%% mk_atom(X) -> (X bsl ?TAG_IMMED2_SIZE) + ?TAG_IMMED2_ATOM. +mk_non_value() -> ?THE_NON_VALUE. + +-spec is_fixnum(integer()) -> boolean(). +is_fixnum(N) when is_integer(N) -> + Bits = ?bytes_to_bits(hipe_rtl_arch:word_size()) - ?TAG_IMMED1_SIZE, + (N =< ((1 bsl (Bits - 1)) - 1)) and (N >= -(1 bsl (Bits - 1))). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +-define(HEADER_EXPORT, mk_header(1, ?TAG_HEADER_EXPORT)). +-define(HEADER_FUN, mk_header(?ERL_FUN_SIZE-2, ?TAG_HEADER_FUN)). +-define(HEADER_PROC_BIN, mk_header(?PROC_BIN_WORDSIZE-1, ?TAG_HEADER_REFC_BIN)). +-define(HEADER_SUB_BIN, mk_header(?SUB_BIN_WORDSIZE-2, ?TAG_HEADER_SUB_BIN)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +tag_boxed(Res, X) -> + hipe_rtl:mk_alu(Res, X, 'add', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)). + +%% tag_bignum(Res, X) -> tag_boxed(Res, X). +tag_flonum(Res, X) -> tag_boxed(Res, X). +tag_tuple(Res, X) -> tag_boxed(Res, X). + +tag_cons(Res, X) -> + hipe_rtl:mk_alu(Res, X, 'add', hipe_rtl:mk_imm(?TAG_PRIMARY_LIST)). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%% Operations to test if an object has a known type T. + +test_nil(X, TrueLab, FalseLab, Pred) -> + hipe_rtl:mk_branch(X, eq, hipe_rtl:mk_imm(?NIL), TrueLab, FalseLab, Pred). + +test_cons(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_LIST), + hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + +test_is_boxed(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Mask = hipe_rtl:mk_imm(?TAG_PRIMARY_MASK - ?TAG_PRIMARY_BOXED), + hipe_rtl:mk_alub(Tmp, X, 'and', Mask, 'eq', TrueLab, FalseLab, Pred). + +get_header(Res, X) -> + hipe_rtl:mk_load(Res, X, hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED))). + +mask_and_compare(X, Mask, Value, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, X, 'and', hipe_rtl:mk_imm(Mask)), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(Value), TrueLab, FalseLab, Pred)]. + +test_immed1(X, Value, TrueLab, FalseLab, Pred) -> + mask_and_compare(X, ?TAG_IMMED1_MASK, Value, TrueLab, FalseLab, Pred). + +test_internal_pid(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_PID, TrueLab, FalseLab, Pred). + +test_any_pid(X, TrueLab, FalseLab, Pred) -> + NotInternalPidLab = hipe_rtl:mk_new_label(), + [test_internal_pid(X, TrueLab, hipe_rtl:label_name(NotInternalPidLab), Pred), + NotInternalPidLab, + test_external_pid(X, TrueLab, FalseLab, Pred)]. + +test_external_pid(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + ExternalPidMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ExternalPidMask, ?TAG_HEADER_EXTERNAL_PID, + TrueLab, FalseLab, Pred)]. + +test_internal_port(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_PORT, TrueLab, FalseLab, Pred). + +test_any_port(X, TrueLab, FalseLab, Pred) -> + NotInternalPortLab = hipe_rtl:mk_new_label(), + [test_internal_port(X, TrueLab, hipe_rtl:label_name(NotInternalPortLab), Pred), + NotInternalPortLab, + test_external_port(X, TrueLab, FalseLab, Pred)]. + +test_external_port(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + ExternalPortMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ExternalPortMask, ?TAG_HEADER_EXTERNAL_PORT, + TrueLab, FalseLab, Pred)]. + +test_fixnum(X, TrueLab, FalseLab, Pred) -> + test_immed1(X, ?TAG_IMMED1_SMALL, TrueLab, FalseLab, Pred). + +test_atom(X, TrueLab, FalseLab, Pred) -> + mask_and_compare(X, ?TAG_IMMED2_MASK, ?TAG_IMMED2_ATOM, + TrueLab, FalseLab, Pred). + +test_tuple(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_alub(Tmp2, Tmp, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + TrueLab, FalseLab, Pred)]. + +test_tuple_N(X, N, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(mk_arityval(N)), + TrueLab, FalseLab, Pred)]. + +test_ref(X, TrueLab, FalseLab, Pred) -> + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + Tag = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + TwoThirdsTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Hdr, X), + hipe_rtl:mk_alu(Tag, Hdr, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_REF), + TrueLab, hipe_rtl:label_name(TwoThirdsTrueLab), Pred), + TwoThirdsTrueLab, + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_EXTERNAL_REF), + TrueLab, FalseLab, Pred) + ]. + +-ifdef(EFT_NATIVE_ADDRESS). +test_closure(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_FUN, + TrueLab, FalseLab, Pred)]. +-endif. + +test_fun(X, TrueLab, FalseLab, Pred) -> + Hdr = hipe_rtl:mk_new_reg_gcsafe(), + Tag = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + TwoThirdsTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Hdr, X), + hipe_rtl:mk_alu(Tag, Hdr, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_FUN), + TrueLab, hipe_rtl:label_name(TwoThirdsTrueLab), Pred), + TwoThirdsTrueLab, + hipe_rtl:mk_branch(Tag, 'eq', hipe_rtl:mk_imm(?TAG_HEADER_EXPORT), + TrueLab, FalseLab, Pred)]. + +test_fun2(X, Arity, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + TFalse = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_call([Tmp], {erlang,is_function,2}, [X,Arity], + hipe_rtl:label_name(HalfTrueLab), FalseLab, 'not_remote'), + HalfTrueLab, + hipe_rtl:mk_load_atom(TFalse, 'false'), + hipe_rtl:mk_branch(Tmp, 'ne', TFalse, TrueLab, FalseLab, Pred)]. + +flonum_header() -> + mk_header(8 div hipe_rtl_arch:word_size(), ?TAG_HEADER_FLOAT). + +test_flonum(X, TrueLab, FalseLab, Pred) -> + HeaderFlonum = flonum_header(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(HeaderFlonum), + TrueLab, FalseLab, Pred)]. + +test_bignum(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + BigMask = ?TAG_HEADER_MASK - ?BIG_SIGN_BIT, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, FalseLab, Pred)]. + +test_pos_bignum(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + BigMask = ?TAG_HEADER_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, FalseLab, Pred)]. + +test_matchstate(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, ?TAG_HEADER_MASK, ?TAG_HEADER_BIN_MATCHSTATE, + TrueLab, FalseLab, Pred)]. + +test_bitstr(X, TrueLab, FalseLab, Pred) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + HalfTrueLab = hipe_rtl:mk_new_label(), + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + [test_is_boxed(X, hipe_rtl:label_name(HalfTrueLab), FalseLab, Pred), + HalfTrueLab, + get_header(Tmp, X), + mask_and_compare(Tmp, Mask, ?TAG_HEADER_REFC_BIN, TrueLab, FalseLab, Pred)]. + +test_binary(X, TrueLab, FalseLab, Pred) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + IsBoxedLab = hipe_rtl:mk_new_label(), + IsBitStrLab = hipe_rtl:mk_new_label(), + IsSubBinLab = hipe_rtl:mk_new_label(), + Mask = ?TAG_HEADER_MASK - ?BINARY_XXX_MASK, + [test_is_boxed(X, hipe_rtl:label_name(IsBoxedLab), FalseLab, Pred), + IsBoxedLab, + get_header(Tmp1, X), + mask_and_compare(Tmp1, Mask, ?TAG_HEADER_REFC_BIN, + hipe_rtl:label_name(IsBitStrLab), FalseLab, Pred), + IsBitStrLab, + mask_and_compare(Tmp1, ?TAG_HEADER_MASK, ?TAG_HEADER_SUB_BIN, + hipe_rtl:label_name(IsSubBinLab), TrueLab, 0.5), + IsSubBinLab, + get_field_from_term({sub_binary, bitsize}, X, Tmp2), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(0), TrueLab, FalseLab, Pred)]. + +test_list(X, TrueLab, FalseLab, Pred) -> + Lab = hipe_rtl:mk_new_label(), + [test_cons(X, TrueLab, hipe_rtl:label_name(Lab), 0.5), + Lab, + test_nil(X, TrueLab, FalseLab, Pred)]. + +test_integer(X, TrueLab, FalseLab, Pred) -> + Lab = hipe_rtl:mk_new_label(), + [test_fixnum(X, TrueLab, hipe_rtl:label_name(Lab), 0.5), + Lab, + test_bignum(X, TrueLab, FalseLab, Pred)]. + +test_number(X, TrueLab, FalseLab, Pred) -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Lab3 = hipe_rtl:mk_new_label(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + BigMask = ?TAG_HEADER_MASK - ?BIG_SIGN_BIT, + HeaderFlonum = flonum_header(), + [test_fixnum(X, TrueLab, hipe_rtl:label_name(Lab1), 0.5), + Lab1, + test_is_boxed(X, hipe_rtl:label_name(Lab2), FalseLab, 0.5), + Lab2, + get_header(Tmp, X), + mask_and_compare(Tmp, BigMask, ?TAG_HEADER_POS_BIG, + TrueLab, hipe_rtl:label_name(Lab3), 0.5), + Lab3, + hipe_rtl:mk_branch(Tmp, 'eq', hipe_rtl:mk_imm(HeaderFlonum), + TrueLab, FalseLab, Pred)]. + +%% CONS, NIL, and TUPLE are not constants, everything else is +test_constant(X, TrueLab, FalseLab, Pred) -> + Lab1 = hipe_rtl:mk_new_label(), + Lab2 = hipe_rtl:mk_new_label(), + Pred1 = 1-Pred, + [test_cons(X, FalseLab, hipe_rtl:label_name(Lab1), Pred1), + Lab1, + test_nil(X, FalseLab, hipe_rtl:label_name(Lab2), Pred1), + Lab2, + test_tuple(X, FalseLab, TrueLab, Pred1)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +tag_fixnum(DestVar, SrcReg) -> + [hipe_rtl:mk_fixnumop(DestVar, SrcReg, tag)]. +%% [hipe_rtl:mk_alu(DestVar, SrcReg, sll, hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)), +%% hipe_rtl:mk_alu(DestVar, DestVar, add, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +realtag_fixnum(DestVar, SrcReg) -> + [hipe_rtl:mk_alu(DestVar, SrcReg, sll, hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)), + hipe_rtl:mk_alu(DestVar, DestVar, add, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +untag_fixnum(DestReg, SrcVar) -> + hipe_rtl:mk_fixnumop(DestReg, SrcVar, untag). +%% hipe_rtl:mk_alu(DestReg, SrcVar, 'sra', hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)). + +realuntag_fixnum(DestReg, SrcVar) -> + hipe_rtl:mk_alu(DestReg, SrcVar, 'sra', hipe_rtl:mk_imm(?TAG_IMMED1_SIZE)). + +fixnum_val(Fixnum) -> + Fixnum bsr ?TAG_IMMED1_SIZE. + +test_fixnums(Args, TrueLab, FalseLab, Pred) -> + {Reg, Ands} = test_fixnums_1(Args, []), + Ands ++ [test_fixnum(Reg, TrueLab, FalseLab, Pred)]. + +test_fixnums_1([Arg1, Arg2], Acc) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + {Tmp, lists:reverse([hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc])}; +test_fixnums_1([Arg1, Arg2|Args], Acc) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + test_fixnums_1([Tmp|Args], [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2)|Acc]). + +test_two_fixnums(Arg1, Arg2, FalseLab) -> + TrueLab = hipe_rtl:mk_new_label(), + case hipe_rtl:is_imm(Arg2) of + true -> + Value = hipe_rtl:imm_value(Arg2), + case Value band ?TAG_IMMED1_MASK of + ?TAG_IMMED1_SMALL -> + [test_fixnum(Arg1, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab]; + _ -> + [hipe_rtl:mk_goto(FalseLab)] + end; + false -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg1, 'and', Arg2), + test_fixnum(Tmp, hipe_rtl:label_name(TrueLab), FalseLab, 0.99), + TrueLab] + end. + +fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, CmpOp) -> + hipe_rtl:mk_branch(Arg1, CmpOp, Arg2, TrueLab, FalseLab, Pred). + +fixnum_gt(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, gt). + +fixnum_lt(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, lt). + +fixnum_ge(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, ge). + +fixnum_le(Arg1, Arg2, TrueLab, FalseLab, Pred) -> + fixnum_cmp(Arg1, Arg2, TrueLab, FalseLab, Pred, le). + +%% We know the answer will be a fixnum +unsafe_fixnum_add(Arg1, Arg2, Res) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Arg1, add, Tmp)]. + +%% We know the answer will be a fixnum +unsafe_fixnum_sub(Arg1, Arg2, Res) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Res, Arg1, sub, Tmp)]. + +%%% (16X+tag)+((16Y+tag)-tag) = 16X+tag+16Y = 16(X+Y)+tag +%%% (16X+tag)-((16Y+tag)-tag) = 16X+tag-16Y = 16(X-Y)+tag +fixnum_addsub(AluOp, Arg1, Arg2, Res, OtherLab) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + %% XXX: Consider moving this test to the users of fixnum_addsub. + case Arg1 =/= Res andalso Arg2 =/= Res of + true -> + %% Args differ from res. + NoOverflowLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Res, Arg1, AluOp, Tmp, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99), + NoOverflowLab]; + false -> + %% At least one of the arguments is the same as Res. + Tmp2 = hipe_rtl:mk_new_var(), % XXX: shouldn't this var be a reg? + NoOverflowLab = hipe_rtl:mk_new_label(), + [hipe_rtl:mk_alu(Tmp, Arg2, sub, hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Tmp2, Arg1, AluOp, Tmp, not_overflow, + hipe_rtl:label_name(NoOverflowLab), + hipe_rtl:label_name(OtherLab), 0.99), + NoOverflowLab, + hipe_rtl:mk_move(Res, Tmp2)] + end. + +%%% ((16X+tag) div 16) * ((16Y+tag)-tag) + tag = X*16Y+tag = 16(XY)+tag +fixnum_mul(Arg1, Arg2, Res, OtherLab) -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + U1 = hipe_rtl:mk_new_reg_gcsafe(), + U2 = hipe_rtl:mk_new_reg_gcsafe(), + NoOverflowLab = hipe_rtl:mk_new_label(), + [untag_fixnum(U1, Arg1), + hipe_rtl:mk_alu(U2, Arg2, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alub(Tmp, U1, 'mul', U2, overflow, hipe_rtl:label_name(OtherLab), + hipe_rtl:label_name(NoOverflowLab), 0.01), + NoOverflowLab, + hipe_rtl:mk_alu(Res, Tmp, 'add', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +fixnum_andorxor(AluOp, Arg1, Arg2, Res) -> + case AluOp of + 'xor' -> + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_alu(Tmp, Arg1, 'xor', Arg2), % clears tag :-( + hipe_rtl:mk_alu(Res, Tmp, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]; + _ -> hipe_rtl:mk_alu(Res, Arg1, AluOp, Arg2) + end. + +fixnum_not(Arg, Res) -> + Mask = (-1 bsl ?TAG_IMMED1_SIZE), + hipe_rtl:mk_alu(Res, Arg, 'xor', hipe_rtl:mk_imm(Mask)). + +fixnum_bsr(Arg1, Arg2, Res) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [untag_fixnum(Tmp1, Arg2), + hipe_rtl:mk_alu(Tmp2, Arg1, 'sra', Tmp1), + hipe_rtl:mk_alu(Res, Tmp2, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +%% If someone knows how to make this better, please do. +fixnum_bsl(Arg1, Arg2, Res) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp3 = hipe_rtl:mk_new_reg_gcsafe(), + [untag_fixnum(Tmp2, Arg2), + hipe_rtl:mk_alu(Tmp1, Arg1, 'sub', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL)), + hipe_rtl:mk_alu(Tmp3, Tmp1, 'sll', Tmp2), + hipe_rtl:mk_alu(Res, Tmp3, 'or', hipe_rtl:mk_imm(?TAG_IMMED1_SMALL))]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unsafe_car(Dst, Arg) -> + hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST))). + +unsafe_cdr(Dst, Arg) -> + WordSize = hipe_rtl_arch:word_size(), + hipe_rtl:mk_load(Dst, Arg, hipe_rtl:mk_imm(-(?TAG_PRIMARY_LIST)+WordSize)). + +unsafe_constant_element(Dst, Index, Tuple) -> % Index is an immediate + WordSize = hipe_rtl_arch:word_size(), + Offset = -(?TAG_PRIMARY_BOXED) + WordSize * hipe_rtl:imm_value(Index), + hipe_rtl:mk_load(Dst, Tuple, hipe_rtl:mk_imm(Offset)). + +unsafe_update_element(Tuple, Index, Value) -> % Index is an immediate + WordSize = hipe_rtl_arch:word_size(), + Offset = -(?TAG_PRIMARY_BOXED) + WordSize * hipe_rtl:imm_value(Index), + hipe_rtl:mk_store(Tuple, hipe_rtl:mk_imm(Offset), Value). + +%%% wrong semantics +%% unsafe_variable_element(Dst, Index, Tuple) -> % Index is an unknown fixnum +%% %% Load word at (Tuple - 2) + ((Index >> 4) << 2). +%% %% Offset = ((Index >> 4) << 2) - 2. +%% %% Index = x..x1111 (fixnum tag is 2#1111). +%% %% (Index >> 2) = 00x..x11 and ((Index >> 4) << 2) = 00x..x00. +%% %% Therefore, ((Index >> 4) << 2) = (Index >> 2) - 3. +%% %% So Offset = ((Index >> 4) << 2) - 2 = (Index >> 2) - (3 + 2). +%% Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), +%% Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), +%% Shift = ?TAG_IMMED1_SIZE - 2, +%% OffAdj = (?TAG_IMMED1_SMALL bsr Shift) + ?TAG_PRIMARY_BOXED, +%% [hipe_rtl:mk_alu(Tmp1, Index, 'srl', hipe_rtl:mk_imm(Shift)), +%% hipe_rtl:mk_alu(Tmp2, Tmp1, 'sub', hipe_rtl:mk_imm(OffAdj)), +%% hipe_rtl:mk_load(Dst, Tuple, Tmp2)]. + +element(Dst, Index, Tuple, FailLabName, {tuple, A}, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_imm(A), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + valid -> + %% This is no branch, 1 load and 3 alus = 4 instr + [untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_alu(Offset, UIndex, 'sll', + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_load(Dst, Ptr, Offset)]; + fixnums -> + %% This is 1 branch, 1 load and 4 alus = 6 instr + [untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)]; + _ -> + %% This is 3 branches, 1 load and 5 alus = 9 instr + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), + FailLabName, 0.99), + FixnumOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Ptr, Tuple, 'sub',hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)] + end; +element(Dst, Index, Tuple, FailLabName, tuple, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + Header = hipe_rtl:mk_new_reg_gcsafe(), + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_new_reg_gcsafe(), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + fixnums -> + %% This is 1 branch, 2 loads and 5 alus = 8 instr + [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)]; + Num when is_integer(Num) -> + %% This is 1 branch, 1 load and 3 alus = 5 instr + [hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED))| + gen_element_tail(Dst, Ptr, InvIndex, hipe_rtl:mk_imm(Num), + Offset, UIndex, FailLabName, IndexOkLab)]; + _ -> + %% This is 2 branches, 2 loads and 6 alus = 10 instr + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), FailLabName, 0.99), + FixnumOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity,Header,'srl',hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, UIndex, + FailLabName, IndexOkLab)] + end; +element(Dst, Index, Tuple, FailLabName, unknown, IndexInfo) -> + FixnumOkLab = hipe_rtl:mk_new_label(), + BoxedOkLab = hipe_rtl:mk_new_label(), + TupleOkLab = hipe_rtl:mk_new_label(), + IndexOkLab = hipe_rtl:mk_new_label(), + Ptr = hipe_rtl:mk_new_reg(), % offset from Tuple + Header = hipe_rtl:mk_new_reg_gcsafe(), + Tmp = hipe_rtl:mk_new_reg_gcsafe(), + UIndex = hipe_rtl:mk_new_reg_gcsafe(), + Arity = hipe_rtl:mk_new_reg_gcsafe(), + InvIndex = hipe_rtl:mk_new_reg_gcsafe(), + Offset = hipe_rtl:mk_new_reg_gcsafe(), + case IndexInfo of + fixnums -> + %% This is 3 branches, 2 loads and 5 alus = 10 instr + [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab)]; + Num when is_integer(Num) -> + %% This is 3 branches, 2 loads and 4 alus = 9 instr + [test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + hipe_rtl:mk_imm(Num), FailLabName, IndexOkLab)]; + _ -> + %% This is 4 branches, 2 loads, and 6 alus = 12 instr :( + [test_fixnum(Index, hipe_rtl:label_name(FixnumOkLab), + FailLabName, 0.99), + FixnumOkLab, + test_is_boxed(Tuple, hipe_rtl:label_name(BoxedOkLab), + FailLabName, 0.99), + BoxedOkLab, + hipe_rtl:mk_alu(Ptr, Tuple, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_load(Header, Ptr, hipe_rtl:mk_imm(0)), + hipe_rtl:mk_alub(Tmp, Header, 'and', + hipe_rtl:mk_imm(?TAG_HEADER_MASK), 'eq', + hipe_rtl:label_name(TupleOkLab), FailLabName, 0.99), + TupleOkLab, + untag_fixnum(UIndex, Index), + hipe_rtl:mk_alu(Arity, Header, 'srl', + hipe_rtl:mk_imm(?HEADER_ARITY_OFFS))| + gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab)] + end. + +gen_element_tail(Dst, Ptr, InvIndex, Arity, Offset, + UIndex, FailLabName, IndexOkLab) -> + %% now check that 1 <= UIndex <= Arity + %% if UIndex < 1, then (Arity - UIndex) >= Arity + %% if UIndex > Arity, then (Arity - UIndex) < 0, which is >=u Arity + %% otherwise, 0 <= (Arity - UIndex) < Arity + [hipe_rtl:mk_alu(InvIndex, Arity, 'sub', UIndex), + hipe_rtl:mk_branch(InvIndex, 'geu', Arity, FailLabName, + hipe_rtl:label_name(IndexOkLab), 0.01), + IndexOkLab, + hipe_rtl:mk_alu(Offset, UIndex, 'sll', + hipe_rtl:mk_imm(hipe_rtl_arch:log2_word_size())), + hipe_rtl:mk_load(Dst, Ptr, Offset)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +unsafe_closure_element(Dst, Index, Closure) -> % Index is an immediate + Offset = -(?TAG_PRIMARY_BOXED) %% Untag + + ?EFT_ENV %% Field offset + %% Index from 1 to N hence -1) + + (hipe_rtl_arch:word_size() * (hipe_rtl:imm_value(Index)-1)), + hipe_rtl:mk_load(Dst, Closure, hipe_rtl:mk_imm(Offset)). + +mk_fun_header() -> + hipe_rtl:mk_imm(?HEADER_FUN). + +tag_fun(Res, X) -> + tag_boxed(Res, X). + +%% untag_fun(Res, X) -> +%% hipe_rtl:mk_alu(Res, X, 'sub', hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)). + +-ifdef(EFT_NATIVE_ADDRESS). +if_fun_get_arity_and_address(ArityReg, AddressReg, FunP, BadFunLab, Pred) -> + %% EmuAddressPtrReg = hipe_rtl:mk_new_reg(), + %% FEPtrReg = hipe_rtl:mk_new_reg(), + %% ArityReg = hipe_rtl:mk_new_reg(), + %% NumFreeReg = hipe_rtl:mk_new_reg(), + %% RealArityReg = hipe_rtl:mk_new_reg(), + TrueLab0 = hipe_rtl:mk_new_label(), + %% TrueLab1 = hipe_rtl:mk_new_label(), + IsFunCode = test_closure(FunP, hipe_rtl:label_name(TrueLab0), BadFunLab, Pred), + GetArityCode = + [TrueLab0, + %% Funp->arity contains the arity + hipe_rtl:mk_load(ArityReg, FunP, + hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ + ?EFT_ARITY)), + hipe_rtl:mk_load(AddressReg, FunP, + hipe_rtl:mk_imm(-(?TAG_PRIMARY_BOXED)+ + ?EFT_NATIVE_ADDRESS))], + IsFunCode ++ GetArityCode. +-endif. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Binary Code +%% + +create_heap_binary(Base, Size, Dst) when is_integer(Size) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + NoWords=(Size + 3*WordSize-1) div WordSize, + NoBytes = NoWords*WordSize, + HeapBinHeader = hipe_rtl:mk_imm(mk_header(NoWords-1, + ?TAG_HEADER_HEAP_BIN)), + [GetHPInsn, + tag_boxed(Dst, HP), + set_field_from_pointer({heap_bin, thing_word}, HP, HeapBinHeader), + set_field_from_pointer({heap_bin, binsize}, HP, hipe_rtl:mk_imm(Size)), + hipe_rtl:mk_alu(Base, HP, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA)), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(NoBytes)), + PutHPInsn]; + +create_heap_binary(Base, Size, Dst) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + Log2WordSize = hipe_rtl_arch:log2_word_size(), + EvenWordSize = hipe_rtl:mk_new_reg_gcsafe(), + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + Header = hipe_rtl:mk_new_reg_gcsafe(), + Tmp3 = hipe_rtl:mk_new_reg(), % offset from HP + Tmp4 = hipe_rtl:mk_new_reg(), % offset from HP + [GetHPInsn, + hipe_rtl:mk_alu(Tmp1, Size, add, hipe_rtl:mk_imm(WordSize-1)), + hipe_rtl:mk_alu(EvenWordSize, Tmp1, sra, hipe_rtl:mk_imm(Log2WordSize)), + hipe_rtl:mk_alu(Tmp2, EvenWordSize, add, hipe_rtl:mk_imm(1)), + hipe_rtl:mk_alu(Base, HP, add, hipe_rtl:mk_imm(?HEAP_BIN_DATA)), + mk_var_header(Header, Tmp2, ?TAG_HEADER_HEAP_BIN), + set_field_from_pointer({heap_bin, thing_word}, HP, Header), + set_field_from_pointer({heap_bin, binsize}, HP, Size), + tag_boxed(Dst, HP), + hipe_rtl:mk_alu(Tmp3, HP, add, Size), + hipe_rtl:mk_alu(Tmp4, Tmp3, add, hipe_rtl:mk_imm(3*WordSize-1)), + hipe_rtl:mk_alu(HP, Tmp4, 'and', hipe_rtl:mk_imm(-WordSize)), + PutHPInsn]. + +create_refc_binary(Base, Size, Dst) -> + create_refc_binary(Base, Size, hipe_rtl:mk_imm(0), Dst). + +create_refc_binary(Base, Size, Flags, Dst) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + ProcBinHeader = hipe_rtl:mk_imm(?HEADER_PROC_BIN), + WordSize = hipe_rtl_arch:word_size(), + Val = hipe_rtl:mk_new_reg(), % offset from Base + [GetHPInsn, + tag_boxed(Dst, HP), + set_field_from_pointer({proc_bin, thing_word}, HP, ProcBinHeader), + set_field_from_pointer({proc_bin, binsize}, HP, Size), + heap_arch_spec(HP), + hipe_rtl:mk_alu(Val, Base, sub, hipe_rtl:mk_imm(?BINARY_ORIG_BYTES)), + set_field_from_pointer({proc_bin, val}, HP, Val), + set_field_from_pointer({proc_bin, bytes}, HP, Base), + set_field_from_pointer({proc_bin, flags}, HP, Flags), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(?PROC_BIN_WORDSIZE*WordSize)), + PutHPInsn]. + +heap_arch_spec(HP) -> + Tmp1 = hipe_rtl:mk_new_reg(), % MSO state + [hipe_rtl_arch:pcb_load(Tmp1, ?P_OFF_HEAP_MSO), + set_field_from_pointer({proc_bin, next}, HP, Tmp1), + hipe_rtl_arch:pcb_store(?P_OFF_HEAP_MSO, HP)]. + +test_heap_binary(Binary, TrueLblName, FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp1, Binary), + hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_HEAP_BIN), + TrueLblName, FalseLblName)]. + +mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Orig) -> + mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + hipe_rtl:mk_imm(0), Orig). + +mk_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + Writable, Orig) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + WordSize = hipe_rtl_arch:word_size(), + [GetHPInsn, + tag_boxed(Dst, HP), + build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, Writable, Orig), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE*WordSize)), + PutHPInsn]. + +build_sub_binary(Dst, ByteSize, ByteOffs, BitSize, BitOffs, + Writable, Orig) -> + Head = hipe_rtl:mk_imm(?HEADER_SUB_BIN), + [set_field_from_term({sub_binary, thing_word}, Dst, Head), + set_field_from_term({sub_binary, binsize}, Dst, ByteSize), + set_field_from_term({sub_binary, offset}, Dst, ByteOffs), + set_field_from_term({sub_binary, bitsize}, Dst, BitSize), + set_field_from_term({sub_binary, bitoffset}, Dst, BitOffs), + set_field_from_term({sub_binary, is_writable}, Dst, Writable), + set_field_from_term({sub_binary, orig}, Dst, Orig)]. + +test_subbinary(Binary, TrueLblName, FalseLblName) -> + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + Tmp2 = hipe_rtl:mk_new_reg_gcsafe(), + [get_header(Tmp1, Binary), + hipe_rtl:mk_alu(Tmp2, Tmp1, 'and', hipe_rtl:mk_imm(?TAG_HEADER_MASK)), + hipe_rtl:mk_branch(Tmp2, eq, hipe_rtl:mk_imm(?TAG_HEADER_SUB_BIN), TrueLblName, FalseLblName)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Float Code + +unsafe_load_float(DstLo, DstHi, Src) -> + WordSize = hipe_rtl_arch:word_size(), + Offset1 = -(?TAG_PRIMARY_BOXED) + WordSize, + Offset2 = Offset1 + 4, %% This should really be 4 and not WordSize + case hipe_rtl_arch:endianess() of + little -> + [hipe_rtl:mk_load(DstLo, Src, hipe_rtl:mk_imm(Offset1), int32, unsigned), + hipe_rtl:mk_load(DstHi, Src, hipe_rtl:mk_imm(Offset2), int32, unsigned)]; + big -> + [hipe_rtl:mk_load(DstHi, Src, hipe_rtl:mk_imm(Offset1), int32, unsigned), + hipe_rtl:mk_load(DstLo, Src, hipe_rtl:mk_imm(Offset2), int32, unsigned)] + end. + +unsafe_untag_float(Dst, Src) -> + Offset = -(?TAG_PRIMARY_BOXED) + hipe_rtl_arch:word_size(), + [hipe_rtl:mk_fload(Dst, Src, hipe_rtl:mk_imm(Offset))]. + +unsafe_tag_float(Dst, Src) -> + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + Head = hipe_rtl:mk_imm(flonum_header()), + WordSize = hipe_rtl_arch:word_size(), + [GetHPInsn, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0), Head), + hipe_rtl:mk_fstore(HP, hipe_rtl:mk_imm(WordSize), Src), + tag_flonum(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(WordSize+8)), + PutHPInsn]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% BigNum Code + +unsafe_mk_big(Dst, Src, Signedness) -> + WordSize = hipe_rtl_arch:word_size(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + PosHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_POS_BIG)), + NegHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_NEG_BIG)), + PosLabel = hipe_rtl:mk_new_label(), + NegLabel = hipe_rtl:mk_new_label(), + JoinLabel = hipe_rtl:mk_new_label(), + PutHeaderCode = + case Signedness of + unsigned -> + [hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), PosHead)]; + signed -> + [hipe_rtl:mk_branch(Src, ge, hipe_rtl:mk_imm(0), + hipe_rtl:label_name(PosLabel), + hipe_rtl:label_name(NegLabel)), + PosLabel, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), PosHead), + hipe_rtl:mk_goto(hipe_rtl:label_name(JoinLabel)), + NegLabel, + hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(0*WordSize), NegHead), + JoinLabel] + end, + RestCode = + [hipe_rtl:mk_store(HP, hipe_rtl:mk_imm(1*WordSize), Src), + tag_boxed(Dst, HP), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(2*WordSize)), + PutHPInsn], + [GetHPInsn] ++ PutHeaderCode ++ RestCode. + +get_one_word_pos_bignum(USize, Size, Fail) -> + Header = hipe_rtl:mk_new_reg(), + HalfLbl = hipe_rtl:mk_new_label(), + HalfLblName = hipe_rtl:label_name(HalfLbl), + WordSize = hipe_rtl_arch:word_size(), + PosHead = hipe_rtl:mk_imm(mk_header(1, ?TAG_HEADER_POS_BIG)), + [get_header(Header, Size), + hipe_rtl:mk_branch(Header, eq, PosHead, HalfLblName, Fail), + HalfLbl, + hipe_rtl:mk_load(USize, Size, hipe_rtl:mk_imm(1*WordSize + -?TAG_PRIMARY_BOXED))]. + +-spec bignum_sizeneed(non_neg_integer()) -> non_neg_integer(). + +bignum_sizeneed(Size) -> + WordSizeBits = hipe_rtl_arch:word_size() * 8, + case is_fixnum(1 bsl Size) of + true -> + 0; + false -> + ((Size + (WordSizeBits-1)) div WordSizeBits) + 1 + end. + +bignum_sizeneed_code(SizeReg,FixNumLblName) -> + WordSizeBits = hipe_rtl_arch:word_size() * 8, + WordShifts = hipe_rtl_arch:log2_word_size() + 3, + MaxFixNum = WordSizeBits - ?TAG_IMMED1_SIZE - 1, + ResReg = hipe_rtl:mk_new_reg_gcsafe(), + Tmp1 = hipe_rtl:mk_new_reg_gcsafe(), + BigLbl = hipe_rtl:mk_new_label(), + Code = + [hipe_rtl:mk_branch(SizeReg, le, hipe_rtl:mk_imm(MaxFixNum), + FixNumLblName, hipe_rtl:label_name(BigLbl)), + BigLbl, + hipe_rtl:mk_alu(Tmp1,SizeReg,add,hipe_rtl:mk_imm(WordSizeBits-1)), + hipe_rtl:mk_alu(ResReg,Tmp1,srl,hipe_rtl:mk_imm(WordShifts)), + hipe_rtl:mk_alu(ResReg,ResReg,add,hipe_rtl:mk_imm(1))], + {ResReg,Code}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% MatchState Code + +create_matchstate(Max, BinSize, Base, Offset, Orig, Ms) -> + WordSize = hipe_rtl_arch:word_size(), + {GetHPInsn, HP, PutHPInsn} = hipe_rtl_arch:heap_pointer(), + ByteSize = (Max+1)*WordSize + ?MS_SAVEOFFSET, + SizeInWords = ((ByteSize div WordSize) - 1), + Header = hipe_rtl:mk_imm(mk_header(SizeInWords, ?TAG_HEADER_BIN_MATCHSTATE)), + [GetHPInsn, + hipe_rtl:mk_alu(Ms, HP, add, hipe_rtl:mk_imm(?TAG_PRIMARY_BOXED)), + set_field_from_term({matchstate,thing_word}, Ms, Header), + set_field_from_term({matchstate,{matchbuffer,orig}}, Ms, Orig), + set_field_from_term({matchstate,{matchbuffer,base}}, Ms, Base), + set_field_from_term({matchstate,{matchbuffer,binsize}}, Ms, BinSize), + set_field_from_term({matchstate,{matchbuffer,offset}}, Ms, Offset), + set_field_from_term({matchstate,{saveoffset, 0}}, Ms, Offset), + hipe_rtl:mk_alu(HP, HP, add, hipe_rtl:mk_imm(ByteSize)), + PutHPInsn]. + +convert_matchstate(Ms) -> + WordSize = hipe_rtl_arch:word_size(), + Header = hipe_rtl:mk_new_reg_gcsafe(), + TmpSize = hipe_rtl:mk_new_reg_gcsafe(), + SavedOffset = hipe_rtl:mk_new_reg_gcsafe(), + Orig = hipe_rtl:mk_new_reg_gcsafe(), + BinSize = hipe_rtl:mk_new_reg_gcsafe(), + ByteSize = hipe_rtl:mk_new_reg_gcsafe(), + BitSize = hipe_rtl:mk_new_reg_gcsafe(), + ByteOffset = hipe_rtl:mk_new_reg_gcsafe(), + BitOffset = hipe_rtl:mk_new_reg_gcsafe(), + SizeInWords = hipe_rtl:mk_new_reg_gcsafe(), + Hole = hipe_rtl:mk_new_reg_gcsafe(), + BigIntHeader = hipe_rtl:mk_new_reg_gcsafe(), + [get_field_from_term({matchstate, {matchbuffer, orig}}, Ms, Orig), + get_field_from_term({matchstate, {matchbuffer, binsize}}, Ms, BinSize), + get_field_from_term({matchstate, {saveoffset, 0}}, Ms, SavedOffset), + get_field_from_term({matchstate, thing_word}, Ms, Header), + hipe_rtl:mk_alu(TmpSize, BinSize, sub, SavedOffset), + hipe_rtl:mk_alu(BitSize, TmpSize, 'and', hipe_rtl:mk_imm(7)), + hipe_rtl:mk_alu(BitOffset, SavedOffset, 'and', hipe_rtl:mk_imm(7)), + hipe_rtl:mk_alu(ByteSize, TmpSize, srl, hipe_rtl:mk_imm(3)), + hipe_rtl:mk_alu(ByteOffset, SavedOffset, srl, hipe_rtl:mk_imm(3)), + build_sub_binary(Ms, ByteSize, ByteOffset, BitSize, BitOffset, + hipe_rtl:mk_imm(0), Orig), + size_from_header(SizeInWords, Header), + hipe_rtl:mk_alu(Hole, SizeInWords, sub, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE-1)), + mk_var_header(BigIntHeader, Hole, ?TAG_HEADER_POS_BIG), + hipe_rtl:mk_store(Ms, hipe_rtl:mk_imm(?SUB_BIN_WORDSIZE*WordSize-?TAG_PRIMARY_BOXED), + BigIntHeader)]. + +compare_matchstate(Max, Ms, LargeEnough, TooSmall) -> + WordSize = hipe_rtl_arch:word_size(), + ByteSize = (Max+1)*WordSize + ?MS_SAVEOFFSET, + SizeInWords = ((ByteSize div WordSize) - 1), + Header = hipe_rtl:mk_imm(mk_header(SizeInWords, ?TAG_HEADER_BIN_MATCHSTATE)), + RealHeader = hipe_rtl:mk_new_reg_gcsafe(), + [hipe_rtl:mk_load(RealHeader, Ms, hipe_rtl:mk_imm(-?TAG_PRIMARY_BOXED)), + hipe_rtl:mk_branch(RealHeader, ge, Header, LargeEnough, TooSmall)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% Struct manipulation code + +get_field_offset({matchstate, thing_word}) -> + ?MS_THING_WORD; +get_field_offset({matchstate, matchbuffer}) -> + ?MS_MATCHBUFFER; +get_field_offset({matchstate, {matchbuffer, _} = Field}) -> + ?MS_MATCHBUFFER + get_field_offset(Field); +get_field_offset({matchstate, {saveoffset, N}} = Field) -> + ?MS_SAVEOFFSET + N*get_field_size1(Field); +get_field_offset({sub_binary, thing_word}) -> + ?SUB_BIN_THING_WORD; +get_field_offset({sub_binary, binsize}) -> + ?SUB_BIN_BINSIZE; +get_field_offset({sub_binary, bitsize}) -> + ?SUB_BIN_BITSIZE; +get_field_offset({sub_binary, offset}) -> + ?SUB_BIN_OFFS; +get_field_offset({sub_binary, bitoffset}) -> + ?SUB_BIN_BITOFFS; +get_field_offset({sub_binary, is_writable}) -> + ?SUB_BIN_WRITABLE; +get_field_offset({sub_binary, orig}) -> + ?SUB_BIN_ORIG; +get_field_offset({proc_bin, thing_word}) -> + ?PROC_BIN_THING_WORD; +get_field_offset({proc_bin, binsize}) -> + ?PROC_BIN_BINSIZE; +get_field_offset({proc_bin, next}) -> + ?PROC_BIN_NEXT; +get_field_offset({proc_bin, val}) -> + ?PROC_BIN_VAL; +get_field_offset({proc_bin, bytes}) -> + ?PROC_BIN_BYTES; +get_field_offset({proc_bin, flags}) -> + ?PROC_BIN_FLAGS; +get_field_offset({binary, orig_bytes}) -> + ?BINARY_ORIG_BYTES; +get_field_offset({binary, orig_size}) -> + ?BINARY_ORIG_SIZE; +get_field_offset({heap_bin, thing_word}) -> + ?HEAP_BIN_THING_WORD; +get_field_offset({heap_bin, binsize}) -> + ?HEAP_BIN_SIZE; +get_field_offset({heap_bin, {data, N}} = Field) -> + ?HEAP_BIN_DATA+N*get_field_size1(Field); +get_field_offset({matchbuffer, offset}) -> + ?MB_OFFSET; +get_field_offset({matchbuffer, orig}) -> + ?MB_ORIG; +get_field_offset({matchbuffer, base}) -> + ?MB_BASE; +get_field_offset({matchbuffer, binsize}) -> + ?MB_SIZE. + +get_field_size(Field) -> + size_to_atom(get_field_size1(Field)). + +size_to_atom(Bytes) -> + WordSize = hipe_rtl_arch:word_size(), + case Bytes of + WordSize -> word; + 4 -> int32; + %%2 -> int16; So far there are no 2 byte fields + 1 -> byte + end. + +get_field_size1({matchstate, thing_word}) -> + ?MS_THING_WORD_SIZE; +get_field_size1({matchstate, {matchbuffer, _} = Field}) -> + get_field_size1(Field); +get_field_size1({matchstate, {saveoffset, _N}}) -> + ?MS_SAVEOFFSET_SIZE; +get_field_size1({sub_binary, thing_word}) -> + ?SUB_BIN_THING_WORD_SIZE; +get_field_size1({sub_binary, binsize}) -> + ?SUB_BIN_BINSIZE_SIZE; +get_field_size1({sub_binary, bitsize}) -> + ?SUB_BIN_BITSIZE_SIZE; +get_field_size1({sub_binary, offset}) -> + ?SUB_BIN_OFFS_SIZE; +get_field_size1({sub_binary, bitoffset}) -> + ?SUB_BIN_BITOFFS_SIZE; +get_field_size1({sub_binary, is_writable}) -> + ?SUB_BIN_WRITABLE_SIZE; +get_field_size1({sub_binary, orig}) -> + ?SUB_BIN_ORIG_SIZE; +get_field_size1({proc_bin, thing_word}) -> + ?PROC_BIN_THING_WORD_SIZE; +get_field_size1({proc_bin, binsize}) -> + ?PROC_BIN_BINSIZE_SIZE; +get_field_size1({proc_bin, next}) -> + ?PROC_BIN_NEXT_SIZE; +get_field_size1({proc_bin, val}) -> + ?PROC_BIN_VAL_SIZE; +get_field_size1({proc_bin, bytes}) -> + ?PROC_BIN_BYTES_SIZE; +get_field_size1({proc_bin, flags}) -> + ?PROC_BIN_FLAGS_SIZE; +get_field_size1({binary, orig_bytes}) -> + ?BINARY_ORIG_BYTES_SIZE; +get_field_size1({binary, orig_size}) -> + ?BINARY_ORIG_SIZE_SIZE; +get_field_size1({heap_bin, thing_word}) -> + ?HEAP_BIN_THING_WORD_SIZE; +get_field_size1({heap_bin, binsize}) -> + ?HEAP_BIN_SIZE_SIZE; +get_field_size1({heap_bin, {data, _}}) -> + ?HEAP_BIN_DATA_SIZE; +get_field_size1({matchbuffer, offset}) -> + ?MB_OFFSET_SIZE; +get_field_size1({matchbuffer, orig}) -> + ?MB_ORIG_SIZE; +get_field_size1({matchbuffer, base}) -> + ?MB_BASE_SIZE; +get_field_size1({matchbuffer, binsize}) -> + ?MB_SIZE_SIZE. + +get_field_from_term(Struct, Term, Dst) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct) - ?TAG_PRIMARY_BOXED), + Size = get_field_size(Struct), + hipe_rtl:mk_load(Dst, Term, Offset, Size, unsigned). + +set_field_from_term(Struct, Term, Value) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct) - ?TAG_PRIMARY_BOXED), + Size = get_field_size(Struct), + hipe_rtl:mk_store(Term, Offset, Value, Size). + +get_field_from_pointer(Struct, Term, Dst) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct)), + Size = get_field_size(Struct), + hipe_rtl:mk_load(Dst, Term, Offset, Size, unsigned). + +set_field_from_pointer(Struct, Term, Value) -> + Offset = hipe_rtl:mk_imm(get_field_offset(Struct)), + Size = get_field_size(Struct), + hipe_rtl:mk_store(Term, Offset, Value, Size). + +extract_matchbuffer(Mb, Ms) -> + What = {matchstate, matchbuffer}, + Offset = hipe_rtl:mk_imm(get_field_offset(What) - ?TAG_PRIMARY_BOXED), + hipe_rtl:mk_alu(Mb, Ms, add, Offset). + +extract_binary_bytes(Binary, Base) -> + Offset = hipe_rtl:mk_imm(get_field_offset({binary, orig_bytes})), + hipe_rtl:mk_alu(Base, Binary, add, Offset). |